?? skl_dct_llm.cpp

?? LLM fast dct algorithm
?? CPP
?? 第 1 頁 / 共 2 頁
字號:
12 下一頁
/* *  skl_dct_LLM.cpp * * LLM implementation of Forward Discrete Cosine Transform * designed for [0..255] input only. * * See: *  Loeffler C., Ligtenberg A., and Moschytz C.S.:  *    Practical Fast 1D DCT Algorithm with Eleven Multiplications,  *  Proc. ICASSP 1989, 988-991. * * (C) 2002 Pascal Massimino * details at http://skal.planet-d.net/coding/dct.html ********************************************************/#include <math.h>#include <stdio.h>typedef short SKL_INT16;typedef long  SKL_INT32;#define IN_TYPE  SKL_INT16#define OUT_TYPE SKL_INT16#define TMP_TYPE SKL_INT32//////////////////////////////////////////////////////////#define SHIFTL(m,n) ((m)<<(n))#define SHIFTRH(m,n) ( ( (m)+HALF(n) ) >> (n) )#define FX(x,n) ((IN_TYPE)floor((x)*(1<<(n)) + .5))#define HALF(n) (1<<((n)-1))#define RMULT(x, y, n) ( (IN_TYPE)(((TMP_TYPE)(x)*(TMP_TYPE)(y))>>(n)) )#define PMULHW(x, y) ( (IN_TYPE)( ((TMP_TYPE)(x)*(TMP_TYPE)(y))>>16 ) )#define RMULT3(x, y, n) ( ( PMULHW(x,y) + HALF((n)-16) ) >> ((n)-16) )#define SWAP(m1,m2,tmp) (tmp)=(m1); (m1)=(m2); (m2)=(tmp)#define STORE(a, m)   In[(a)*8] = (OUT_TYPE)(m);#define LOAD(m, a)    (m) = (IN_TYPE) In[(a)*8]#define LOAD_BUTF(m1, m2, a, b) \  (m1) = (IN_TYPE)In[(a)*8] - (IN_TYPE)In[(b)*8]; \  (m2) = (IN_TYPE)In[(a)*8] + (IN_TYPE)In[(b)*8]#define BUTF(a, b, tmp) \  (tmp) = (a)+(b);      \  (a)   = (a)-(b);      \  (b)   = (tmp)#define BUTFSAFE(a, b, tmp) \  (tmp) = (IN_TYPE)( ((b)+(a)+1) >> 1 ); \  (a)   = (IN_TYPE)( ((a)-(b)+1) >> 1 ); \  (b)   = (tmp)#define ROTATE(a, b, Rot, tmp)        \  (tmp) = RMULT( (a), (Rot), 15 ) - (b); \  (a)   = (a) + RMULT( (b), (Rot), 15 ); \  (b)   = (tmp)#define ROTATE16(a, b, Rot, tmp)         \  (tmp)= PMULHW( (a), (Rot) ) - (b); \  (a)  = (a) + PMULHW( (b), (Rot) ); \  (b)  = (tmp)#define ROTATE15(a, b, Rot, tmp)           \  (tmp)= (b) - (a) - PMULHW( (a), (Rot) ); \  (a)  = (a) + (b) + PMULHW( (b), (Rot) ); \  (b)  = (tmp)//////////////////////////////////////////////////////////static const IN_TYPE tan_1_16 = FX( tan(1.*M_PI/16), 15 );static const IN_TYPE tan_1_16_16b = FX( tan(1.*M_PI/16), 16 );static const IN_TYPE tan_2_16 = FX( tan(2.*M_PI/16), 15 );static const IN_TYPE tan_2_16_16b = FX( sqrt(2.0)-1., 16 );static const IN_TYPE tan_3_16 = FX( tan(3.*M_PI/16), 15 );static const IN_TYPE tan_3_16_16b = FX( tan(3.*M_PI/16)-1., 16 );static const IN_TYPE cos_4_16  = FX( cos(4.*M_PI/16), 15 );static const IN_TYPE ucos_4_16  = 0xb505; // unsignedstatic const IN_TYPE ocos_4_16 = FX( cos(4.*M_PI/16)-.5, 16 );static const IN_TYPE cos_4_16_16b = FX( cos(4.*M_PI/16)-.5, 17 );static const IN_TYPE cos3_cos1  = FX( cos(3.*M_PI/16)/cos(1.*M_PI/16), 15 );static const IN_TYPE ocos3_cos1 = FX( cos(3.*M_PI/16)/cos(1.*M_PI/16) - .5, 16 );static const double c1 = cos(1.*M_PI/16);static const double s1 = sin(1.*M_PI/16);static const double t1 = tan(1.*M_PI/16);static const double c2 = cos(2.*M_PI/16);static const double s2 = sin(2.*M_PI/16);static const double c3 = cos(3.*M_PI/16);static const double s3 = sin(3.*M_PI/16);static const double t3 = tan(3.*M_PI/16);static const double c4 = cos(4.*M_PI/16);static const double c6 = cos(6.*M_PI/16);#define PASSB  2static void DCT_FWD_PASS1(OUT_TYPE *In){  IN_TYPE mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;  IN_TYPE Spill;  LOAD_BUTF(mm7, mm0, 0, 7);  LOAD_BUTF(mm4, mm3, 3, 4);  LOAD_BUTF(mm6, mm1, 1, 6);  LOAD_BUTF(mm5, mm2, 2, 5);  BUTF( mm0, mm3, Spill );  BUTF( mm1, mm2, Spill );  BUTF( mm3, mm2, Spill );  mm0 = SHIFTL(mm0, PASSB);  mm1 = SHIFTL(mm1, PASSB);  mm2 = SHIFTL(mm2, PASSB);  mm3 = SHIFTL(mm3, PASSB);  STORE(0, mm2);  STORE(4, mm3);    // Rot6  Spill = PMULHW(mm0, tan_2_16_16b) + mm0;  mm0 = mm0+mm1;  mm1 = (Spill - mm0) | 1;  mm0 = (Spill + mm0) | 1;  STORE(2, mm0);  STORE(6, mm1);    // Rot1/3  mm4 = SHIFTL(mm4, PASSB+1);  mm5 = SHIFTL(mm5, PASSB);  mm6 = SHIFTL(mm6, PASSB);  mm7 = SHIFTL(mm7, PASSB+1);  BUTF( mm6, mm5, Spill );  mm6 = PMULHW( mm6, cos_4_16_16b ) + mm6;  mm5 = PMULHW( mm5, cos_4_16_16b ) + mm5;  mm5 |= 1;  mm6 |= 1;  BUTF( mm7, mm5, Spill );  BUTF( mm4, mm6, Spill );  ROTATE16( mm5, mm6, tan_1_16_16b, Spill );  ROTATE15( mm4, mm7, tan_3_16_16b, Spill );  STORE(1, mm5);  STORE(3, mm7);  STORE(5, mm4);  STORE(7, mm6);}static void DCT_FWD_PASS2(OUT_TYPE *In){  IN_TYPE mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7;  IN_TYPE Spill;  LOAD_BUTF(mm7, mm0, 0, 7);  LOAD_BUTF(mm4, mm3, 3, 4);  LOAD_BUTF(mm6, mm1, 1, 6);  LOAD_BUTF(mm5, mm2, 2, 5);  BUTF( mm0, mm3, Spill );  BUTF( mm1, mm2, Spill );//  BUTF( mm3, mm2, Spill );   mm2 >>= 1;  mm3 >>= 1;  Spill = mm2;  mm2 = mm3+Spill;  mm3 = mm3-Spill;    STORE(0, mm2);  STORE(4, mm3);    //  Rot6  Spill = PMULHW(mm0, tan_2_16_16b) + mm0;  mm0 = mm0+mm1;  mm1 = (Spill - mm0) | 1;  mm0 = (Spill + mm0) | 1;  STORE(2, mm0);  STORE(6, mm1);    // Rot1/3  BUTF( mm6, mm5, Spill );  mm6 = PMULHW( mm6, ucos_4_16 ) + mm6;  mm5 = PMULHW( mm5, ucos_4_16 ) + mm5;  mm5 |= 1;  BUTF( mm7, mm5, Spill );  BUTF( mm4, mm6, Spill );  ROTATE16( mm5, mm6, tan_1_16_16b, Spill );  ROTATE15( mm4, mm7, tan_3_16_16b, Spill );  STORE(1, mm5);  STORE(3, mm7);  STORE(5, mm4);  STORE(7, mm6);}static IN_TYPE Final[64] = {  FX(c4*c4, 15)-1, FX(c4*c1, 14), FX(c4*c6, 15), FX(c4*c3, 14), FX(c4*c4, 15)-1, FX(c4*c3, 14), FX(c4*c2, 15), FX(c4*c1, 14),  FX(c1*c4, 14),   FX(c1*c1, 13), FX(c1*c6, 14), FX(c1*c3, 13), FX(c1*c4, 14),   FX(c1*c3, 13), FX(c1*c2, 14), FX(c1*c1, 13),  FX(c6*c4, 14),   FX(c6*c1, 13), FX(c6*c6, 14), FX(c6*c3, 13), FX(c6*c4, 14),   FX(c6*c3, 13), FX(c6*c2, 14), FX(c6*c1, 13),  FX(c3*c4, 14),   FX(c3*c1, 13), FX(c3*c6, 14), FX(c3*c3, 13), FX(c3*c4, 14),   FX(c3*c3, 13), FX(c3*c2, 14), FX(c3*c1, 13),  FX(c4*c4, 15)-1, FX(c4*c1, 14), FX(c4*c6, 15), FX(c4*c3, 14), FX(c4*c4, 15)-1, FX(c4*c3, 14), FX(c4*c2, 15), FX(c4*c1, 14),   FX(c3*c4, 14),   FX(c3*c1, 13), FX(c3*c6, 14), FX(c3*c3, 13), FX(c3*c4, 14),   FX(c3*c3, 13), FX(c3*c2, 14), FX(c3*c1, 13),  FX(c2*c4, 14),   FX(c2*c1, 13), FX(c2*c6, 14), FX(c2*c3, 13), FX(c2*c4, 14),   FX(c2*c3, 13), FX(c2*c2, 14), FX(c2*c1, 13),  FX(c1*c4, 14),   FX(c1*c1, 13), FX(c1*c6, 14), FX(c1*c3, 13), FX(c1*c4, 14),   FX(c1*c3, 13), FX(c1*c2, 14), FX(c1*c1, 13)};static void Print_Cst() {  printf( "Mults dw" );  for(int k=0; k<64; ++k) {    printf( " 0x%.4x,", Final[k]>>PASSB );    if ((k&7)==7) if (k!=63) printf( "\n      dw"); else printf("\n");  }  printf( "Halves dw" );   for(int l=0; l<64; ++l) {     IN_TYPE M = Final[l]>>PASSB; IN_TYPE H = HALF(16)/M;    printf( " 0x%.4x,", H );    if ((l&7)==7) if (l!=63) printf( "\n       dw"); else printf("\n");  }  printf( "tan_1_16_16b = 0x%x\n", tan_1_16_16b);  printf( "tan_2_16_16b = 0x%x\n", tan_2_16_16b);  printf( "tan_3_16_16b = 0x%x\n", tan_3_16_16b);  printf( "cos_4_16_16b = 0x%x\n", cos_4_16_16b);}static void FINISH(OUT_TYPE *In){//  Print_Cst();  for(int i=0; i<64; ++i) {    IN_TYPE M = Final[i]>>PASSB;    IN_TYPE H = HALF(16)/M;    In[i] = (OUT_TYPE)PMULHW( In[i]+H, M );  }}///////////////////////////////////////////////////////////*IEEE1180-like Errors specs:Peak error:   1.0000Peak MSE:     0.1813Overall MSE:  0.0502Peak ME:      0.1813Overall ME:   -0.0214*/static void TRANSPOSE( OUT_TYPE *In ){  for(int i=0; i<8; ++i)    for(int j=i+1; j<8; ++j) {      SKL_INT16 Tmp = In[i+8*j];      In[i+8*j] = In[j+8*i];      In[j+8*i] = Tmp;    }}void Skl_Dct16_LLM( OUT_TYPE *In ){  int i;  TRANSPOSE(In);  for(i=0; i<8; ++i) DCT_FWD_PASS1(In+i);  TRANSPOSE(In);  for(i=0; i<8; ++i) DCT_FWD_PASS2(In+i);  FINISH(In);}////////////////////////////////////////////////////////////   NASM source///////////////////////////////////////////////////////////*; [BITS 32];//////////////////////////////////////////////////////////////////////SECTION .dataalign 16Mults dw 0x0fff, 0x0b18, 0x08a8, 0x0968, 0x0fff, 0x0968, 0x14e7, 0x0b18,      dw 0x0b18, 0x07b2, 0x0601, 0x0686, 0x0b18, 0x0686, 0x0e7f, 0x07b2,      dw 0x0454, 0x0300, 0x0257, 0x028b, 0x0454, 0x028b, 0x05a8, 0x0300,      dw 0x0968, 0x0686, 0x0517, 0x0587, 0x0968, 0x0587, 0x0c4a, 0x0686,      dw 0x0fff, 0x0b18, 0x08a8, 0x0968, 0x0fff, 0x0968, 0x14e7, 0x0b18,      dw 0x0968, 0x0686, 0x0517, 0x0587, 0x0968, 0x0587, 0x0c4a, 0x0686,      dw 0x0a73, 0x073f, 0x05a8, 0x0625, 0x0a73, 0x0625, 0x0da8, 0x073f,      dw 0x0b18, 0x07b2, 0x0601, 0x0686, 0x0b18, 0x0686, 0x0e7f, 0x07b2Halves dw 0x0008, 0x000b, 0x000e, 0x000d, 0x0008, 0x000d, 0x0006, 0x000b,       dw 0x000b, 0x0010, 0x0015, 0x0013, 0x000b, 0x0013, 0x0008, 0x0010,       dw 0x001d, 0x002a, 0x0036, 0x0032, 0x001d, 0x0032, 0x0016, 0x002a,       dw 0x000d, 0x0013, 0x0019, 0x0017, 0x000d, 0x0017, 0x000a, 0x0013,       dw 0x0008, 0x000b, 0x000e, 0x000d, 0x0008, 0x000d, 0x0006, 0x000b,       dw 0x000d, 0x0013, 0x0019, 0x0017, 0x000d, 0x0017, 0x000a, 0x0013,       dw 0x000c, 0x0011, 0x0016, 0x0014, 0x000c, 0x0014, 0x0009, 0x0011,       dw 0x000b, 0x0010, 0x0015, 0x0013, 0x000b, 0x0013, 0x0008, 0x0010tan_1_16_16b times 8 dw 0x32ectan_2_16_16b times 8 dw 0x6a0atan_3_16_16b times 8 dw 0xab0eucos_4_16_16b times 8 dw 0xb505Rounder times 8 dw 0x0001Spill   times 8 dw 0;//////////////////////////////////////////////////////////////////////SECTION .text
12 下一頁
?? 文件大小 5 K
?? 上傳用戶 qq448792326
?? 所屬分類壓縮解壓
??? 相關標簽

#algorithm #fast #LLM #dct
?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

?? skl_dct_llm.cpp

?? 快捷鍵說明