?? skl_mpg4_enc.cpp
字號:
/******************************************************** * Some code. Copyright (C) 2003 by Pascal Massimino. * * All Rights Reserved. (http://skal.planet-d.net) * * For Educational/Academic use ONLY. See 'LICENSE.TXT'.* ********************************************************//* * skl_mpg4_enc.cpp * * MPEG4 encoder ********************************************************/#include "./skl_mpg4i.h"#include "skl_syst/skl_exception.h"static const char * const ID_STRING = "sklmp4 004";//////////////////////////////////////////////////////////#define ABS(x) (((x)<0) ? -(x) : (x))#define DIV_ROUND(x,y) ( (x)>=0 ? ((x)+((y)>>1))/(y) : ((x)-((y)>>1))/(y) )//////////////////////////////////////////////////////////// Encoding tables//////////////////////////////////////////////////////////struct SKL_VLC { SKL_INT16 Val, Len; }; // Table B-12static const SKL_VLC MV_B12_Tab[32+1+32] = { { 5, 13}, { 7, 13}, { 5, 12}, { 7, 12}, { 9, 12}, {11, 12}, {13, 12}, {15, 12}, { 9, 11}, {11, 11}, {13, 11}, {15, 11}, {17, 11}, {19, 11}, {21, 11}, {23, 11}, {25, 11}, {27, 11}, {29, 11}, {31, 11}, {33, 11}, {35, 11}, {19, 10}, {21, 10}, {23, 10}, { 7, 8}, { 9, 8}, {11, 8}, { 7, 7}, { 3, 5}, { 3, 4}, { 3, 3}, { 1, 1}, { 2, 3}, { 2, 4}, { 2, 5}, { 6, 7}, {10, 8}, { 8, 8}, { 6, 8}, {22, 10}, {20, 10}, {18, 10}, {34, 11}, {32, 11}, {30, 11}, {28, 11}, {26, 11}, {24, 11}, {22, 11}, {20, 11}, {18, 11}, {16, 11}, {14, 11}, {12, 11}, {10, 11}, { 8, 11}, {14, 12}, {12, 12}, {10, 12}, { 8, 12}, { 6, 12}, { 4, 12}, { 6, 13}, { 4, 13}}; // Table B-6static const SKL_VLC MCBPC_Intra_B6[2][4] = { // index: [MB_Type-3][Cbpc] { {1, 1}, {1, 3}, {2, 3}, {3, 3} } // MB_Type = 3, Cbpc=00 .. 11, { {1, 4}, {1, 6}, {2, 6}, {3, 6} } // MB_Type = 4, Cbpc=00 .. 11}; // Table B-7static const SKL_VLC MCBPC_Inter_B7[5][4] = { // index: [MB_Type][Cbpc] { {1, 1}, {3, 4}, {2, 4}, {5, 6} }, { {3, 3}, {7, 7}, {6, 7}, {5, 9} }, { {2, 3}, {5, 7}, {4, 7}, {5, 8} }, { {3, 5}, {4, 8}, {3, 8}, {3, 7} }, { {4, 6}, {4, 9}, {3, 9}, {2, 9} }};static const SKL_VLC CBPY_B8[16] = { // index: Cbpy {3, 4}, {5, 5}, {4, 5}, { 9, 4}, {3, 5}, {7, 4}, {2, 6}, {11, 4}, {2, 5}, {3, 6}, {5, 4}, {10, 4}, {4, 4}, {8, 4}, {6, 4}, { 3, 2}}; // Table B13/B14: DC-diff code, indexed by [size-1]. // Bits for |DC| are blanked. // Final Marker bit included for Size>8static const SKL_VLC DCY_Tab_B13[12] = { { 6, 3}, { 8, 4}, {16, 6}, {16, 7}, {32, 9}, {64, 11}, {128, 13}, {256, 15}, {1025, 18}, {4097, 21}, {8193, 23}, {8193, 24}}; static const SKL_VLC DCC_Tab_B14[12] = { { 4, 3}, { 4, 4}, { 8, 6}, {16, 8}, {32, 10}, {64, 12}, {128, 14}, {256, 16}, {1025, 19}, {2049, 21}, {4097, 23}, {8193, 25}}; // Table 6-27static const int DQuant_Tab[5] = { 1, 0, -1 /*error*/, 2, 3 }; // Table 6-33static const SKL_VLC Spr_Tab_B33[15] = { { 0, 2}, { 2, 3}, { 3, 3}, { 4, 3}, { 5, 3}, { 6, 3}, { 14, 4}, { 30, 5}, { 62, 6}, { 126, 7}, { 254, 8}, {510, 9}, {1022,10}, {2046,11}, {4094,12}}; //////////////////////////////////////////////////////////// MV coding//////////////////////////////////////////////////////////static void Write_Vector_Comp(SKL_FBB * const Bits, SKL_INT16 Val, SKL_INT32 Fix){ const int High = 16 << Fix; SKL_ASSERT(Val>=-2*High && Val<2*High); if (Val < -High) Val += 2*High; else if (Val >= High) Val -= 2*High; if (!Val) Bits->Put_Bits(1, 1); // 0 case hardcoded else { if (--Fix) { const SKL_UINT32 Sign = (Val<0); if (Sign) Val ^= -1; else Val -= 1; const int Residue = Val & SKL_BMASKS::And[Fix]; Val >>= Fix; SKL_ASSERT(Val>=0 && Val<=31); Bits->Put_Bits( MV_B12_Tab[33+Val].Val | Sign, MV_B12_Tab[33+Val].Len ); Bits->Put_Bits( Residue, Fix ); } else { SKL_ASSERT(Val>=-32 && Val<=32); Bits->Put_Bits( MV_B12_Tab[Val+32].Val, MV_B12_Tab[Val+32].Len ); } } }static void Write_Vector(SKL_FBB * const Bits, const SKL_MV MV, SKL_INT32 Fix){ Write_Vector_Comp(Bits, MV[0], Fix); Write_Vector_Comp(Bits, MV[1], Fix);}//////////////////////////////////////////////////////////// AC coeff coding. //// Description of the matrices Code[Run][Level]:// Codable {run,level} combinaisons ('x') are packed on the// upper left corner. Actually, the matrix looks like:// +-----------------[level]// |xxxxxxxxxxx...... <-// |xxxxxxxx......... <- max_level[run]// |xxxx............. <-// |xx...............// |x................// |x................// |.................// |.................//[run]^^^^___ max_run[level]//// Uncharted coeffs ('.') need being coded with escape code.// If you use Esc-1 coding, you will jump on the left by// max_level[run] steps, in hope you fall on a 'x' coded// combinaison. With Esc-2 coding, you will move up by// max_run[level]+1 steps. If neither works, you have to// fall back to expensive Esc-3 coding.//////////////////////////////////////////////////////////#include "./skl_mpg4_enc_tbl.h"static inline void Write_DC_Coeff(SKL_FBB * const Bits, SKL_INT16 DC, const int Lum){ if (DC) { const int Sign = (DC<0); if (Sign) DC = -DC; int Size = SKL_BMASKS::Log2(DC); SKL_ASSERT(Size>=1 && Size<=12); if (Sign) DC ^= SKL_BMASKS::And[Size]; const SKL_VLC *VLC = Lum ? &DCY_Tab_B13[Size-1] : &DCC_Tab_B14[Size-1]; if (Size>8) DC <<= 1; // make room for marker bit Bits->Put_Bits( VLC->Val | DC, VLC->Len ); } else { if (Lum) Bits->Put_Bits( 3, 3 ); else Bits->Put_Bits( 3, 2 ); }}static inline void Encode_Coeff(SKL_FBB *Bits, int Level, const SKL_UINT32 * const Tab){ SKL_ASSERT(Level!=0 && Level>=-2048 && Level<=2047); if (!((Level+64)&-128)) { // Level is in [-64,63] -> use table const SKL_UINT32 Code = Tab[Level]; if (0<=(SKL_INT32)Code) // !bit31? -> it's a regular 21bits-max code Bits->Put_Bits( Code&0x00ffffff, Code>>24 ); else // otherwise, code is actually a 30bits-Esc3 Bits->Put_Bits( Code&0x7fffffff, 7+2+1+6+1+12+1 ); } else { // Esc3 encoding (30bits total) for level not in [-64,63] // Tab[0] contains the esc3 code base: 0x01e02001 | (last<<20) | (run<<14) const SKL_UINT32 Code = Tab[0] | ((Level&0xfff)<<1); Bits->Put_Bits( Code, 7+2+1+6+1+12+1 ); }}static inline void Write_Coeffs(SKL_FBB * const Bits, const SKL_INT16 *C, const int Zigzag[], int Last, const SKL_UINT32 Tabs[2][64][128]){ Zigzag += Last; int j = -Last; while(1) { int Level; do Level = C[Zigzag[j++]]; while (!Level); if (j<=0) { Encode_Coeff( Bits, Level, &Tabs[0][Last+j-1][64] ); Last = -j; } else { Encode_Coeff( Bits, Level, &Tabs[1][Last][64] ); break; } }}static inline int Find_Last(const SKL_INT16 C[6*64], const int *Zigzag, int i){ while(i>=0) if (C[Zigzag[i]]) return i; else i--; return -1;}////////////////////////////////////////////////////////////// Trellis-Based quantization//// So far I understand this paper://// "Trellis-Based R-D Optimal Quantization in H.263+"// J.Wen, M.Luttrell, J.Villasenor// IEEE Transactions on Image Processing, Vol.9, No.8, Aug. 2000.//// we are at stake with a simplified Bellman-Ford / Dijkstra Single// Source Shortest Path algo. But due to the underlying graph structure// ("Trellis"), it can be turned into a dynamic programming algo,// partially saving the explicit graph's nodes representation. And // without using a heap, since the open frontier of the DAG is always// known, and of fixed size.////////////////////////////////////////////////////////////#define DBG 0SKL_UINT32 SKL_MP4_ENC_I::Evaluate_Cost(const SKL_INT16 *C, const int * const Zigzag, int Max, int Lambda) const{#if (DBG>0) const SKL_INT16 * const Ref = C + 6*64; int Last = Max; while(Last>=0 && C[Zigzag[Last]]==0) Last--; int Bits = 0; if (Last>=0) { Bits = 2; // CBP int j=0, j0=0; int Run, Level; while(j<Last) { while(!C[Zigzag[j]]) j++; if (j==Last) break; Level=C[Zigzag[j]]; Run = j - j0; j0 = ++j; if (Level>=-24 && Level<=24) Bits += B16_17_Code_Len[(Level<0) ? -Level-1 : Level-1][Run]; else Bits += 30; } Level = C[Zigzag[Last]]; Run = j - j0; if (Level>=-6 && Level<=6) Bits += B16_17_Code_Len_Last[(Level<0) ? -Level-1 : Level-1][Run]; else Bits += 30; } int Dist = 0; for(int i=0; i<=Last; ++i) { const int q = Quant_Type ? ((Q*Inter_Matrix[Zigzag[i]]) >> 4) : Q; const int Mult = 2*q; const int Bias = (q-1) | 1; int V = C[Zigzag[i]]*Mult; if (V>0) V += Bias; else if (V<0) V -= Bias; V -= Ref[Zigzag[i]]; Dist += V*V; } SKL_UINT32 Cost = Lambda*Dist + (Bits<<16); if (DBG==1) printf( " Last:%2d/%2d Cost = [(Bits=%5.0d) + Lambda*(Dist=%6.0d) = %d ] >>12= %d ", Last,Max, Bits, Dist, Cost, Cost>>12 ); return Cost;#else return 0;#endif}#define TL(q) 0xfe00/(q*q)static const int Trellis_Lambda_Tabs[31] = { TL( 1),TL( 2),TL( 3),TL( 4),TL( 5),TL( 6), TL( 7), TL( 8),TL( 9),TL(10),TL(11),TL(12),TL(13),TL(14), TL(15), TL(16),TL(17),TL(18),TL(19),TL(20),TL(21),TL(22), TL(23), TL(24),TL(25),TL(26),TL(27),TL(28),TL(29),TL(30), TL(31)};#undef TLint SKL_MP4_ENC_I::Trellis_Quantize(SKL_INT16 * const Out, const int Q, const int * const Zigzag, int Non_Zero) const{ // Note: We should search last non-zero coeffs on *real* DCT input coeffs (In[]), // not quantized one (Out[]). However, it only improves the result *very* // slightly (~0.01dB), whereas speed drops to crawling level :) // Well, actually, taking 1 more coeff past Non_Zero into account sometimes helps, Non_Zero = Find_Last(Out, Zigzag, Non_Zero); if (Non_Zero<0) return -1; struct NODE { SKL_INT16 Run, Level; }; NODE Nodes[65], Last; SKL_UINT32 Run_Costs0[64+1], * const Run_Costs = Run_Costs0 + 1; const int Lambda = Trellis_Lambda_Tabs[Q-1]; // it's 1/lambda, actually int Run_Start = -1; Run_Costs[-1] = 2<<16; // source (w/ CBP penalty) SKL_UINT32 Min_Cost = 2<<16; int Last_Node = -1; SKL_UINT32 Last_Cost = 0;#if (DBG>0) Last.Level = 0; Last.Run = -1; // just initialize to smthg#endif int i, j; for(i=0; i<=Non_Zero; i++) { const int q = Quant_Type ? ((Q*Inter_Matrix[Zigzag[i]]) >> 4) : Q; const int Mult = 2*q; const int Bias = (q-1) | 1; const int Lev0 = Mult + Bias; const SKL_INT16 * const In = Out + 6*64; const int AC = In[Zigzag[i]]; const int Level1 = Out[Zigzag[i]]; const int Dist0 = Lambda* AC*AC; Last_Cost += Dist0; SKL_UINT32 Best_Cost; if (3U>(SKL_UINT32)(Level1+1)) // very specialized loop for -1,0,+1 { int dQ; if (AC<0) { Nodes[i].Level = -1; dQ = Lev0 + AC; } else { Nodes[i].Level = 1; dQ = Lev0 - AC; } const SKL_UINT32 Cost0 = Lambda*dQ*dQ; Nodes[i].Run = 1; Best_Cost = (Code_Len20[0]<<16) + Run_Costs[i-1]+Cost0; for(int Run=i-Run_Start; Run>0; --Run) { const SKL_UINT32 Cost_Base = Cost0 + Run_Costs[i-Run]; const SKL_UINT32 Cost = Cost_Base + (Code_Len20[Run-1]<<16); // TODO: what about tie-breaks? Should we favor short runs or // long runs? Although the error is the same, it would not be // spread the same way along high and low frequencies... if (Cost<Best_Cost) { Best_Cost = Cost; Nodes[i].Run = Run; } const SKL_UINT32 lCost = Cost_Base + (Code_Len24[Run-1]<<16); if (lCost<Last_Cost) { Last_Cost = lCost; Last.Run = Run; Last_Node = i; } } if (Last_Node==i) Last.Level = Nodes[i].Level; if (DBG==1) { Run_Costs[i] = Best_Cost; printf( "Costs #%2d: ", i); for(j=-1;j<=Non_Zero;++j) { if (j==Run_Start) printf( " %3.0d|", Run_Costs[j]>>12 ); else if (j>Run_Start && j<i) printf( " %3.0d|", Run_Costs[j]>>12 ); else if (j==i) printf( "(%3.0d)", Run_Costs[j]>>12 ); else printf( " - |" ); } printf( "<%3.0d %2d %d>", Min_Cost>>12, Nodes[i].Level, Nodes[i].Run ); printf( " Last:#%2d {%3.0d %2d %d}", Last_Node, Last_Cost>>12, Last.Level, Last.Run ); printf( " AC:%3.0d Dist0:%3d Dist(%d)=%d", AC, Dist0>>12, Nodes[i].Level, Cost0 ); printf( "\n" ); } } else if (51U>(SKL_UINT32)(Level1+25)) // "big" levels (not less than ESC3, though) { Best_Cost = 0xf0000000; const SKL_BYTE *Tbl_L1, *Tbl_L2, *Tbl_L1_Last, *Tbl_L2_Last; int Level2; int dQ1, dQ2; if (Level1>1) { dQ1 = Level1*Mult-AC + Bias; dQ2 = dQ1 - Mult; Level2 = Level1-1; Tbl_L1 = (Level1<=24) ? B16_17_Code_Len[Level1-1] : Code_Len0; Tbl_L2 = (Level2<=24) ? B16_17_Code_Len[Level2-1] : Code_Len0; Tbl_L1_Last = (Level1<=6) ? B16_17_Code_Len_Last[Level1-1] : Code_Len0; Tbl_L2_Last = (Level2<=6) ? B16_17_Code_Len_Last[Level2-1] : Code_Len0; } else { // Level1<-1 dQ1 = Level1*Mult-AC - Bias; dQ2 = dQ1 + Mult;
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -