?? motion_est_core_2_optimized.c
字號:
pMB->mv16y_0 = (X<<25)>>25;
pMB->mv16x_1 = pMB->mv16x_0;
pMB->mv16y_1 = pMB->mv16y_0;
pMB->mv16x_2 = pMB->mv16x_0;
pMB->mv16y_2 = pMB->mv16y_0;
pMB->mv16x_3 = pMB->mv16x_0;
pMB->mv16y_3 = pMB->mv16y_0;
pCodec->MVZ = (X == 0);
} else {
pMB->mode = MODE_INTER4V;
X = (pCodec->ME_command_queue0[0+(pCodec->even_odd_1)*4]>>12);
pMB->mv16x_0 = (X<<18)>>25;
pMB->mv16y_0 = (X<<25)>>25;
// *vpe_stop = 0x93000000 | ((pMB->mv16x_0 & 0x7f)<<8) | (pMB->mv16y_0 & 0x7f);
X = (pCodec->ME_command_queue0[1+(pCodec->even_odd_1)*4]>>12);
pMB->mv16x_1 = (X<<18)>>25;
pMB->mv16y_1 = (X<<25)>>25;
// *vpe_stop = 0x93000000 | ((pMB->mv16x_1 & 0x7f)<<8) | (pMB->mv16y_1 & 0x7f);
X = (pCodec->ME_command_queue0[2+(pCodec->even_odd_1)*4]>>12);
pMB->mv16x_2 = (X<<18)>>25;
pMB->mv16y_2 = (X<<25)>>25;
// *vpe_stop = 0x93000000 | ((pMB->mv16x_2 & 0x7f)<<8) | (pMB->mv16y_2 & 0x7f);
X = (pCodec->ME_command_queue0[3+(pCodec->even_odd_1)*4]>>12);
pMB->mv16x_3 = (X<<18)>>25;
pMB->mv16y_3 = (X<<25)>>25;
// *vpe_stop = 0x93000000 | ((pMB->mv16x_3 & 0x7f)<<8) | (pMB->mv16y_3 & 0x7f);
pCodec->MVZ = 0;
}
}
#ifdef DUMP_PMV_RESULT
switch(pMB->mode)
{
case MODE_INTRA:
fprintf(pmv_result_file," Mode is Intra mode\n\n");
break;
case MODE_INTER:
fprintf(pmv_result_file," (1MV) MVD are : 0x%04x, 0x%04x\n",((pCodec->ME_command_queue0[11]>>16)&0x0ffff), (pCodec->ME_command_queue0[11]&0x0ffff));
fprintf(pmv_result_file," Mode is Inter 1MV mode\n\n");
break;
case MODE_INTER4V:
fprintf(pmv_result_file," (4MV) MVD are : 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x\n",((pCodec->ME_command_queue0[8]>>16)&0x0ffff), (pCodec->ME_command_queue0[8]&0x0ffff), ((pCodec->ME_command_queue0[9]>>16)&0x0ffff), (pCodec->ME_command_queue0[9]&0x0ffff), ((pCodec->ME_command_queue0[10]>>16)&0x0ffff), (pCodec->ME_command_queue0[10]&0x0ffff), ((pCodec->ME_command_queue0[11]>>16)&0x0ffff), (pCodec->ME_command_queue0[11]&0x0ffff));
fprintf(pmv_result_file," Mode is Inter 4MV mode\n\n");
break;
}
#endif
tmp1 = pMB->mv16x_1;
tmp2 = pMB->mv16y_1;
tmp3 = pMB->mv16x_3;
tmp4 = pMB->mv16y_3;
get_pmvdata1_4MV(x,iWcount, &tmp1, &tmp2, &tmp3, &tmp4,pParam,pCodec);
if (!d_type) {
MOTION_ACTIVITY=abs(tmp1)+abs(tmp2);
d_type=(MOTION_ACTIVITY > L1); // if (MOTION_ACTIVITY > L1) => Large Diamond else Small Diamond
}
pCodec->ME_command_queue0[19] = (2<<29) | ((tmp1 & 0x7f) << 19) | ((tmp2 & 0x7f) << 12) | (pCodec->Raddr + (tmp2 >> 1)*16);
pCodec->ME_command_queue0[20] = (2<<29) | ((pmv[1].x & 0x7f) << 19) | ((pmv[1].y & 0x7f) << 12) | (pCodec->Raddr + (pmv[1].y >> 1)*16);
pCodec->ME_command_queue0[21] = (2<<29) | ((pmv[3].x & 0x7f) << 19) | ((pmv[3].y & 0x7f) << 12) | (pCodec->Raddr + (pmv[3].y >> 1)*16);
pCodec->ME_command_queue0[22] = (2<<29) | (1<<28) | ((prevMB->mv16x_3 & 0x7f) << 19) | ((prevMB->mv16y_3 & 0x7f) << 12) | (pCodec->Raddr + (prevMB->mv16y_3 >> 1)*16);
pCodec->ME_command_queue0[23] = (3<<29) | (d_type << 24) | (Diamond_search_limit << 16) | ThEES; // dsize is always small
pCodec->ME_command_queue0[28] = (2<<29) | ((tmp1 & 0x7f) << 19) | ((tmp2 & 0x7f) << 12) | (pCodec->Raddr + (tmp2 >> 1)*16);
pCodec->ME_command_queue0[29] = (2<<29) | ((pmv[1].x & 0x7f) << 19) | ((pmv[1].y & 0x7f) << 12) | (pCodec->Raddr + (pmv[1].y >> 1)*16);
pCodec->ME_command_queue0[30] = (2<<29) | ((pmv[3].x & 0x7f) << 19) | ((pmv[3].y & 0x7f) << 12) | (pCodec->Raddr + (pmv[3].y >> 1)*16);
pCodec->ME_command_queue0[31] = (1<<28) | (2<<29) | ((prevMB->mv16x_0 & 0x7f) << 19) | ((prevMB->mv16y_0 & 0x7f) << 12) | (pCodec->Raddr + (prevMB->mv16y_0 >> 1)*16);
pCodec->ME_command_queue0[38] = (2<<29) | ((pmv[2].x & 0x7f) << 19) | ((pmv[2].y & 0x7f) << 12) | (pCodec->Raddr + (pmv[2].y >> 1)*16);
pCodec->ME_command_queue0[39] = (2<<29) | ((pmv[3].x & 0x7f) << 19) | ((pmv[3].y & 0x7f) << 12) | (pCodec->Raddr + (pmv[3].y >> 1)*16);
pCodec->ME_command_queue0[40] = (1<<28) | (2<<29) | ((prevMB->mv16x_1 & 0x7f) << 19) | ((prevMB->mv16y_1 & 0x7f) << 12) | (pCodec->Raddr + (prevMB->mv16y_1 >> 1)*16);
pCodec->ME_command_queue0[46] = (2<<29) | ((tmp3 & 0x7f)<<19) | ((tmp4 & 0x7f)<<12) | (pCodec->Raddr23 + (tmp4 >> 1)*16);
pCodec->ME_command_queue0[49] = (1<<28) | (2<<29) | ((prevMB->mv16x_2 & 0x7f) << 19) | ((prevMB->mv16y_2 & 0x7f) << 12) | (pCodec->Raddr23 + (prevMB->mv16y_2 >> 1)*16);
pCodec->ME_command_queue0[58] = (1<<28) | (2<<29) | ((prevMB->mv16x_3 & 0x7f) << 19) | ((prevMB->mv16y_3 & 0x7f) << 12) | (pCodec->Raddr23 + (prevMB->mv16y_3 >> 1)*16);
if ((y==(pParam->mb_height-1)) | ((y==(pParam->mb_height-2)) & (x==(iWcount-1)))) {
if (prevMB->mv16y_3 > 15)
pCodec->ME_command_queue0[58] = pCodec->ME_command_queue0[58] - ((prevMB->mv16y_3 >> 1)*16) + ((15>>1) * 16);
if (prevMB->mv16y_2 > 15)
pCodec->ME_command_queue0[49] = pCodec->ME_command_queue0[49] - ((prevMB->mv16y_2 >> 1)*16) + ((15>>1) * 16);
if (tmp4 > 15)
pCodec->ME_command_queue0[46] = pCodec->ME_command_queue0[46] - ((tmp4 >> 1)*16) + ((15>>1) * 16);
}
if (counter < iWcount) {
if (tmp2 < 0)
pCodec->ME_command_queue0[19] -= (tmp2 >> 1)*16;
if (pmv[1].y < 0) {
pCodec->ME_command_queue0[29] -= (pmv[1].y >> 1)*16;
pCodec->ME_command_queue0[20] -= (pmv[1].y >> 1)*16;
}
if (pmv[3].y < 0) {
pCodec->ME_command_queue0[21] -= (pmv[3].y >> 1)*16;
pCodec->ME_command_queue0[30] -= (pmv[3].y >> 1)*16;
pCodec->ME_command_queue0[39] -= (pmv[3].y >> 1)*16;
}
if (pmv[2].y < 0)
pCodec->ME_command_queue0[38] -= (pmv[2].y >> 1)*16;
if (prevMB->mv16y_0 < 0)
pCodec->ME_command_queue0[31] -= (prevMB->mv16y_0 >> 1)*16;
if (prevMB->mv16y_1 < 0)
pCodec->ME_command_queue0[40] -= (prevMB->mv16y_1 >> 1)*16;
if (prevMB->mv16y_2 < -16)
pCodec->ME_command_queue0[49] -= ((prevMB->mv16y_2 + 16) >> 1)*16;
if (prevMB->mv16y_3 < 0)
pCodec->ME_command_queue0[22] -= (prevMB->mv16y_3 >> 1)*16;
if (prevMB->mv16y_3 < -16)
pCodec->ME_command_queue0[58] -= ((prevMB->mv16y_3 + 16) >> 1)*16;
if (tmp2 < 0)
pCodec->ME_command_queue0[28] -= (tmp2 >> 1)*16;
if (tmp4 < -16)
pCodec->ME_command_queue0[46] -= ((tmp4 + 16) >> 1)*16;
}
return 0;
}
int32_t
MotionEstimation_blocklast_4MV(MACROBLOCK *const pMB_mc,
MBParam * const pParam,
Encoder * pEnc)
{
FTMCP100_CODEC *pCodec=(FTMCP100_CODEC *)pEnc->pCodec;
uint32_t index;
int32_t X;
volatile MDMA *pmdma = MDMA1;
MACROBLOCK *pMB_tmp;
FRAMEINFO * const current1=pEnc->current1;
unsigned int XDIM=pEnc->mEncParam.u32FrameWidth;
DECLARE_MP4_PTR
POLL_MARKER_S
// because we have made the DMA chained and sync to MC done.
// so checking DMA done is equivalent to checking MC done
while((pmdma->Status & 0x1) == 0) {}
POLL_MARKER_E
if (pCodec->MB_mode) {
index = (MBTransQuantInter(pParam, current1,pCodec)) | (pCodec->MVZ << 16) | (pMB_mc->mode<<6);
} else {
if (pParam->resyn==0)
{
pMB_tmp = pMB_mc;
pCodec->acdc_status = 0;
pMB_tmp = pMB_tmp - 1;
if (pMB_tmp->mode != MODE_INTRA)
pCodec->acdc_status |= 1;
pMB_tmp = pMB_tmp - (XDIM/16);
if (pMB_tmp->mode != MODE_INTRA)
pCodec->acdc_status |= 4;
pMB_tmp += 1;
if (pMB_tmp->mode != MODE_INTRA)
pCodec->acdc_status |= 2;
}
else
{
pCodec->acdc_status = 6;
pMB_tmp = pMB_mc - 1;
if (pMB_tmp->mode != MODE_INTRA)
pCodec->acdc_status |= 1;
}
index = MBTransQuantIntra_p(pParam, current1,pCodec);
}
SET_MCCTL(index) // MC GO
{
unsigned int *pDMA_next;
pDMA_next=pCodec->DMA_COMMAND_local+(pCodec->even_odd_1)*DMA_COMMAND_QUEUE_STRIDE;
DMA_MOVE(24+(pCodec->even_odd_1^1)*DMA_COMMAND_QUEUE_STRIDE, 0x4B00014)
pDMA_next[25] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(((pCodec->triple_buffer_selector+2)%3)*384 + INTER_Y0);
pDMA_next[29] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(((pCodec->triple_buffer_selector+2)%3)*384 + INTER_U0);
pDMA_next[33] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(((pCodec->triple_buffer_selector+2)%3)*384 + INTER_V0);
pDMA_next[35]=0x00900010;
}
/****************** CHECK MC_DONE, INT Y OK ****************/
POLL_MC_DONE_MARKER_START
// check MC done
do {
READ_CPSTS(X)
} while(!(X&0x02));
POLL_MC_DONE_MARKER_END
POLL_MARKER_S
// because we have made the DMA chained and sync to MC done.
// so checking DMA done is equivalent to checking MC done
while((pmdma->Status & 0x1) == 0) {}
POLL_MARKER_E
DMA_MOVE(24+(pCodec->even_odd_1)*DMA_COMMAND_QUEUE_STRIDE, 0x4B0000c)
return 0;
}
int32_t
MotionEstimation_block0_1MV(MACROBLOCK *const pMB,
Encoder *pEnc)
{
FTMCP100_CODEC *pCodec=(FTMCP100_CODEC *)pEnc->pCodec;
FRAMEINFO * const current1=pEnc->current1;
FRAMEINFO * const reference=pEnc->reference;
MACROBLOCK *prevMB = &reference->mbs[0];
VECTOR pmv[4];
int32_t MOTION_ACTIVITY;
int32_t X, d_type;
uint32_t lam_16;
int32_t pmvsx, pmvsy;
volatile MDMA *pmdma = MDMA1;
DECLARE_MP4_PTR
unsigned int *pDMA1,*pDMA2;
unsigned int XDIM=pEnc->mEncParam.u32FrameWidth;
pDMA1=pCodec->DMA_COMMAND_local;
pDMA2=pCodec->DMA_COMMAND_local+DMA_COMMAND_QUEUE_STRIDE;
SET_VOPSIZE((pEnc->mEncParam.u32FrameWidth)<<16 | (pEnc->mEncParam.u32FrameHeight))
SET_PMVBUF(PMV_BUF)
SET_HOFFSET(0)
pCodec->ME_command_queue0[16] = (0<<29) | (1<<25) | (0<<23);
pCodec->ME_command_queue0[17] = (1<<29) | (1<<25) | (0<<23);
pCodec->ME_command_queue0[18] = (2<<29) | (1<<27) | pCodec->Raddr; // median result
pCodec->ME_command_queue0[19] = (2<<29) | pCodec->Raddr;
pCodec->ME_command_queue0[20] = (2<<29) | pCodec->Raddr;
pCodec->ME_command_queue0[21] = (2<<29) | pCodec->Raddr;
pCodec->ME_command_queue0[22] = (1<<28) | (2<<29) | ((prevMB->mv16x_3 & 0x7f) << 19) | ((prevMB->mv16y_3 & 0x7f) << 12)
| pCodec->Raddr + (prevMB->mv16y_3 < 0 ? 0 : (prevMB->mv16y_3 >> 1)*16);
pCodec->ME_command_queue0[23] = (3<<29) | (Diamond_search_limit << 16) | ThEES; // dsize is always small
pCodec->ME_command_queue0[24] = ((unsigned int)4<<29) | ThEES1; // to add unsigned int to avoid warning ... half pixel refine
pCodec->ME_command_queue0[25] = ((unsigned int)5<<29) | (1<<28) | MV16_INTER_BIAS; // to add unsigned int to avoid warning ... P-I switch
pCodec->ME_command_queue0[26] = ((unsigned int)6<<29); // to add unsigned int to avoid warning ... Y interpolation
pCodec->ME_command_queue0[27] = ((unsigned int)6<<29) | (((uint32_t) (REF_U + 32*8)) >> 2) | (1<<27); // to add unsigned int to avoid warning
pCodec->ME_command_queue0[28] = ((unsigned int)6<<29) | (((uint32_t) (REF_V + 32*8)) >> 2) | (1<<27) | (1<<28); // to add unsigned int to avoid warning
/************ for next block ********************************/
lam_16 = NEIGH_TEND_16X16*lambda_vec16[current1->quant];
SET_MECR(lam_16)
SET_CMDADDR(ME_COMMAND_QUEUE_ADDR)
SET_MECADDR(CUR_Y0)
SET_MEIADDR(INTER_Y0)
X = current1->rounding_type << 2 | 1 << 11 | 1 | 1<<3 | 1<<4; // 1 << 11 => fcode
POLL_MARKER_S
while((pmdma->Status & 0x1) == 0) {} // check REFyuv, CURyuv OK
POLL_MARKER_E
SET_MECTL(X) // ME GO ************************
// since we have already prepared the secondary part of DMA commands right in the FrameCodeP(),
// so we can directly begin to execute the secondary part of DMA command buffer (from 0th to 0+40(0x28)-1=39th)
DMA_MOVE(DMA_COMMAND_QUEUE_STRIDE, 0x4B00028) // enable Transfer Done flag mask
// Enable DMA start transferring
// Enable chain transfer
// From sequential Local memory to sequential System memory
// transfer 0x28(40) words
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -