?? me.c
字號:
static const int x264_pixel_size_shift[7] = { 0, 1, 1, 2, 3, 3, 4 }; int ucost1, ucost2; int cross_start = 1; /* refine predictors */ ucost1 = bcost; DIA1_ITER( pmx, pmy ); if( pmx || pmy ) DIA1_ITER( 0, 0 ); if(i_pixel == PIXEL_4x4) goto me_hex2; ucost2 = bcost; if( (bmx || bmy) && (bmx!=pmx || bmy!=pmy) ) DIA1_ITER( bmx, bmy ); if( bcost == ucost2 ) cross_start = 3; omx = bmx; omy = bmy; /* early termination */#define SAD_THRESH(v) ( bcost < ( v >> x264_pixel_size_shift[i_pixel] ) ) if( bcost == ucost2 && SAD_THRESH(2000) ) { COST_MV_X4( 0,-2, -1,-1, 1,-1, -2,0 ); COST_MV_X4( 2, 0, -1, 1, 1, 1, 0,2 ); if( bcost == ucost1 && SAD_THRESH(500) ) break; if( bcost == ucost2 ) { int range = (i_me_range>>1) | 1; CROSS( 3, range, range ); COST_MV_X4( -1,-2, 1,-2, -2,-1, 2,-1 ); COST_MV_X4( -2, 1, 2, 1, -1, 2, 1, 2 ); if( bcost == ucost2 ) break; cross_start = range + 2; } } /* adaptive search range */ if( i_mvc ) { /* range multipliers based on casual inspection of some statistics of * average distance between current predictor and final mv found by ESA. * these have not been tuned much by actual encoding. */ static const int range_mul[4][4] = { { 3, 3, 4, 4 }, { 3, 4, 4, 4 }, { 4, 4, 4, 5 }, { 4, 4, 5, 6 }, }; int mvd; int sad_ctx, mvd_ctx; if( i_mvc == 1 ) { if( i_pixel == PIXEL_16x16 ) /* mvc is probably the same as mvp, so the difference isn't meaningful. * but prediction usually isn't too bad, so just use medium range */ mvd = 25; else mvd = abs( m->mvp[0] - mvc[0][0] ) + abs( m->mvp[1] - mvc[0][1] ); } else { /* calculate the degree of agreement between predictors. */ /* in 16x16, mvc includes all the neighbors used to make mvp, * so don't count mvp separately. */ int i_denom = i_mvc - 1; mvd = 0; if( i_pixel != PIXEL_16x16 ) { mvd = abs( m->mvp[0] - mvc[0][0] ) + abs( m->mvp[1] - mvc[0][1] ); i_denom++; } for( i = 0; i < i_mvc-1; i++ ) mvd += abs( mvc[i][0] - mvc[i+1][0] ) + abs( mvc[i][1] - mvc[i+1][1] ); mvd /= i_denom; //FIXME idiv } sad_ctx = SAD_THRESH(1000) ? 0 : SAD_THRESH(2000) ? 1 : SAD_THRESH(4000) ? 2 : 3; mvd_ctx = mvd < 10 ? 0 : mvd < 20 ? 1 : mvd < 40 ? 2 : 3; i_me_range = i_me_range * range_mul[mvd_ctx][sad_ctx] / 4; } /* FIXME if the above DIA2/OCT2/CROSS found a new mv, it has not updated omx/omy. * we are still centered on the same place as the DIA2. is this desirable? */ CROSS( cross_start, i_me_range, i_me_range/2 ); /* 5x5 ESA */ omx = bmx; omy = bmy; if( bcost != ucost2 ) COST_MV_X4( 1, 0, 0, 1, -1, 0, 0,-1 ); COST_MV_X4( 1, 1, -1, 1, -1,-1, 1,-1 ); COST_MV_X4( 2,-1, 2, 0, 2, 1, 2, 2 ); COST_MV_X4( 1, 2, 0, 2, -1, 2, -2, 2 ); COST_MV_X4( -2, 1, -2, 0, -2,-1, -2,-2 ); COST_MV_X4( -1,-2, 0,-2, 1,-2, 2,-2 ); /* hexagon grid */ omx = bmx; omy = bmy; for( i = 1; i <= i_me_range/4; i++ ) { static const int hex4[16][2] = { {-4, 2}, {-4, 1}, {-4, 0}, {-4,-1}, {-4,-2}, { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2}, { 2, 3}, { 0, 4}, {-2, 3}, {-2,-3}, { 0,-4}, { 2,-3}, }; if( 4*i > X264_MIN4( mv_x_max-omx, omx-mv_x_min, mv_y_max-omy, omy-mv_y_min ) ) { for( j = 0; j < 16; j++ ) { int mx = omx + hex4[j][0]*i; int my = omy + hex4[j][1]*i; if( mx >= mv_x_min && mx <= mv_x_max && my >= mv_y_min && my <= mv_y_max ) COST_MV( mx, my ); } } else { COST_MV_X4( -4*i, 2*i, -4*i, 1*i, -4*i, 0*i, -4*i,-1*i ); COST_MV_X4( -4*i,-2*i, 4*i,-2*i, 4*i,-1*i, 4*i, 0*i ); COST_MV_X4( 4*i, 1*i, 4*i, 2*i, 2*i, 3*i, 0*i, 4*i ); COST_MV_X4( -2*i, 3*i, -2*i,-3*i, 0*i,-4*i, 2*i,-3*i ); } } goto me_hex2; } case X264_ME_ESA: { const int min_x = X264_MAX( bmx - i_me_range, mv_x_min); const int min_y = X264_MAX( bmy - i_me_range, mv_y_min); const int max_x = X264_MIN( bmx + i_me_range, mv_x_max); const int max_y = X264_MIN( bmy + i_me_range, mv_y_max); int mx, my;#if 0 /* plain old exhaustive search */ for( my = min_y; my <= max_y; my++ ) for( mx = min_x; mx <= max_x; mx++ ) COST_MV( mx, my );#else /* successive elimination by comparing DC before a full SAD, * because sum(abs(diff)) >= abs(diff(sum)). */ const int stride = m->i_stride[0]; const int dw = x264_pixel_size[i_pixel].w; const int dh = x264_pixel_size[i_pixel].h * stride; static uint8_t zero[16*16] = {0,}; const int enc_dc = h->pixf.sad[i_pixel]( m->p_fenc[0], FENC_STRIDE, zero, 16 ); const uint16_t *integral_base = &m->integral[ -1 - 1*stride ]; for( my = min_y; my <= max_y; my++ ) { int mvs[3], i_mvs=0; for( mx = min_x; mx <= max_x; mx++ ) { const uint16_t *integral = &integral_base[ mx + my * stride ]; const uint16_t ref_dc = integral[ 0 ] + integral[ dh + dw ] - integral[ dw ] - integral[ dh ]; const int bsad = bcost - BITS_MVD(mx,my); if( abs( ref_dc - enc_dc ) < bsad ) { if( i_mvs == 3 ) { COST_MV_X4_ABS( mvs[0],my, mvs[1],my, mvs[2],my, mx,my ); i_mvs = 0; } else mvs[i_mvs++] = mx; } } for( i=0; i<i_mvs; i++ ) COST_MV( mvs[i], my ); }#endif } break; } /* -> qpel mv */ if( bpred_cost < bcost ) { m->mv[0] = bpred_mx; m->mv[1] = bpred_my; m->cost = bpred_cost; } else { m->mv[0] = bmx << 2; m->mv[1] = bmy << 2; m->cost = bcost; } /* compute the real cost */ m->cost_mv = p_cost_mvx[ m->mv[0] ] + p_cost_mvy[ m->mv[1] ]; if( bmx == pmx && bmy == pmy && h->mb.i_subpel_refine < 3 ) m->cost += m->cost_mv; /* subpel refine */ if( h->mb.i_subpel_refine >= 2 ) { int hpel = subpel_iterations[h->mb.i_subpel_refine][2]; int qpel = subpel_iterations[h->mb.i_subpel_refine][3]; refine_subpel( h, m, hpel, qpel, p_halfpel_thresh, 0 ); }}#undef COST_MVvoid x264_me_refine_qpel( x264_t *h, x264_me_t *m ){ int hpel = subpel_iterations[h->mb.i_subpel_refine][0]; int qpel = subpel_iterations[h->mb.i_subpel_refine][1]; if( m->i_pixel <= PIXEL_8x8 && h->sh.i_type == SLICE_TYPE_P ) m->cost -= m->i_ref_cost; refine_subpel( h, m, hpel, qpel, NULL, 1 );}#define COST_MV_SAD( mx, my ) \{ \ int stride = 16; \ uint8_t *src = h->mc.get_ref( m->p_fref, m->i_stride[0], pix[0], &stride, mx, my, bw, bh ); \ int cost = h->pixf.sad[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \ + p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \ COPY3_IF_LT( bcost, cost, bmx, mx, bmy, my ); \}#define COST_MV_SATD( mx, my, dir ) \if( b_refine_qpel || (dir^1) != odir ) \{ \ int stride = 16; \ uint8_t *src = h->mc.get_ref( m->p_fref, m->i_stride[0], pix[0], &stride, mx, my, bw, bh ); \ int cost = h->pixf.mbcmp[i_pixel]( m->p_fenc[0], FENC_STRIDE, src, stride ) \ + p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \ if( b_chroma_me && cost < bcost ) \ { \ h->mc.mc_chroma( m->p_fref[4], m->i_stride[1], pix[0], 8, mx, my, bw/2, bh/2 ); \ cost += h->pixf.mbcmp[i_pixel+3]( m->p_fenc[1], FENC_STRIDE, pix[0], 8 ); \ if( cost < bcost ) \ { \ h->mc.mc_chroma( m->p_fref[5], m->i_stride[1], pix[0], 8, mx, my, bw/2, bh/2 ); \ cost += h->pixf.mbcmp[i_pixel+3]( m->p_fenc[2], FENC_STRIDE, pix[0], 8 ); \ } \ } \ if( cost < bcost ) \ { \ bcost = cost; \ bmx = mx; \ bmy = my; \ bdir = dir; \ } \}static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters, int *p_halfpel_thresh, int b_refine_qpel ){ const int bw = x264_pixel_size[m->i_pixel].w; const int bh = x264_pixel_size[m->i_pixel].h; const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0]; const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1]; const int i_pixel = m->i_pixel; const int b_chroma_me = h->mb.b_chroma_me && i_pixel <= PIXEL_8x8; DECLARE_ALIGNED( uint8_t, pix[4][16*16], 16 ); int omx, omy; int i; int bmx = m->mv[0]; int bmy = m->mv[1]; int bcost = m->cost; int odir = -1, bdir; /* try the subpel component of the predicted mv */ if( hpel_iters && h->mb.i_subpel_refine < 3 ) { int mx = x264_clip3( m->mvp[0], h->mb.mv_min_spel[0], h->mb.mv_max_spel[0] ); int my = x264_clip3( m->mvp[1], h->mb.mv_min_spel[1], h->mb.mv_max_spel[1] ); if( mx != bmx || my != bmy ) COST_MV_SAD( mx, my ); } /* halfpel diamond search */ for( i = hpel_iters; i > 0; i-- ) { int omx = bmx, omy = bmy; int costs[4];
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -