?? motion_est_core_2_optimized.c
字號:
// to set pixel interpolation command , last=1 (it's last pixel interpolation command),
// Chroma= 1 (select chrominance (Cb & Cr) component)
// RADD =(((uint32_t) (REF_U + 32*8)) >> 2) (reference block start address)
pCodec->ME_command_queue0[64] = ((unsigned int)6<<29) | (((uint32_t) (REF_V + 32*8)) >> 2) | (1<<27) | (1<<28); // add unsigned int to avoid warning
//----------------------------------------------------------------------------
/************ for next block ********************************/
lam_16 = NEIGH_TEND_16X16*lambda_vec16[current1->quant];
// to set the ME Coefficient Register
// the MECR register is at address 0x10008.
SET_MECR(lam_16)
// to set the ME Command Queue start address Register
// the CMDADDR register is at address 0x10010.
SET_CMDADDR(ME_COMMAND_QUEUE_ADDR) //SET_CMDADDR(pCodec->ME_command_queue0)
// to set the ME Current and Result Block start address Register
// the MECADDR register is at address 0x10014.
SET_MECADDR(CUR_Y0)
// to set the ME Interpolation Block Start Address Register
// the MEIADDR register is at address 0x10024.
SET_MEIADDR(INTER_Y0)
// what's bit 11 doing??? not documented ...
X = current1->rounding_type << 2 | 1 << 11 | 1 | 1<<3 | 1<<4; // 1 << 11 => fcode
POLL_MARKER_S
while((pmdma->Status & 0x1) == 0) {} // check REFyuv, CURyuv OK
POLL_MARKER_E
// set the ME Control Register and begin ME engine
// MECTL register is at 0x10000
// the X variable is used to set the MECTL register, their correspond setting is as shown below :
// MBCNT_DIS=0 (not disable MB counter ??? ) , SKIP_PXI=0 (not skip PXI command for decoding...well..looks it is not used in encoder ???)
// VOP_START=1 (starts VOP layer processing)
// VPKT_START=1 (starts new Video Packet -- used for resync marker ...asked from James )
// RND= current->rounding_type (rounding control for pixel interpolation)
// PXI_1MV=0 (interpolation block size, used for decoding mode)
// ME_GO=1 (start ME operation)
SET_MECTL(X) // ME GO ************************
// since we have already prepared the secondary part of DMA commands right in the FrameCodeP(),
// so we can directly begin to execute the secondary part of DMA command buffer (from 0th to 0+40(0x28)-1=39th)
DMA_MOVE(DMA_COMMAND_QUEUE_STRIDE, 0x4B00028) // Enable DMA start transferring
// Enable chain transfer
// From sequential Local memory to sequential System memory
// transfer 0x28(40) words
//---------------------------------------------------------------------------
// let's correct some settings in primary part of DMA double command buffers
// because some initial DMA command settings in primary part of DMA command
// double buffer within FrameCodeP() were just used for moving
// the reference image once again. So we should correct those temporary
// settings to normal settings.
//---------------------------------------------------------------------------
// You know we disable this command chain for the purpose of moving another
// reference image again right before MotionEstimation_block0_4MV(),
// so we eanble the command chain again.
pDMA1[11] = 0x4A50000 | 48;
// let's set the primary of DMA command buffer for reference image location to the right address
// for DMA double command buffer purpose
// since we use DMA double buffer, so the increment for Y block of reference image is 512 bytes instead of 256 bytes
pDMA1[0] = ((uint32_t) pEnc->reference->reconstruct.y - (256*XDIM/16) + 768 | 0x07); // add offset 256*3=768
// since we use DMA double buffer, so the increment for U block of reference image is 128 bytes instead of 64 bytes
pDMA1[4] = ((uint32_t) pEnc->reference->reconstruct.u - (64*XDIM/16) + 192 | 0x05); // add offset 64*3=192
// since we use DMA double buffer, so the increment for V block of reference image is 128 bytes instead of 64 bytes
pDMA1[8] = ((uint32_t) pEnc->reference->reconstruct.v - (64*XDIM/16) + 192 | 0x05); // add offset 64*3=192
//---------------------------------------------------------------------------
// let's prepare the primary part of DMA double command buffer here
// for MotionEstimation_4MV() since you can see from the RTL simulation that
// the time that polling ME done take is quite wasteful for 4MV.
//---------------------------------------------------------------------------
pDMA1[1] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS((uint32_t)REF_Y + 48);
pDMA1[5] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS((uint32_t)REF_U + 24);
pDMA1[9] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS((uint32_t)REF_V + 24);
pDMA1[37] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(LOCAL_PREDICTOR0 + 0x80);
pDMA1[40] = pDMA2[36];
pDMA1[36] = (uint32_t) (pCodec->pred_value_phy + 32);
// make it a chain
pDMA1[39] = 0x04A42010; // make it gorup 2, to sync to MC done, since MC must be done before loading the ac dc predictor
// make it group ID 1, disable all these DMA commands
pDMA1[27] = 0x04B01040;
pDMA1[31] = 0x04B01010;
pDMA1[35] = 0x04B01010;
// set whether the ME is done or not
// CPSTS register is at address 0x10028
POLL_ME_DONE_MARKER_START
// CHECK ME IS DONE **************
do {
READ_CPSTS(X)
} while(!(X&0x01));
POLL_ME_DONE_MARKER_END
#ifdef DUMP_PMV_RESULT
if(1) {
int index_array[20]={ 19,20,21,22, 28,29,30,31, 37,38,39,40, 46,47,48,49, 55,56,57,58 };
int c;
int x_result[20],y_result[20],radd_result[20];
int32_t sad_value;
int32_t XX;
for(c=0;c<20;c++)
{
sad_value=pCodec->ME_command_queue0[index_array[c]];
if(sad_value & (1<<26))
{ // SRC==1
int src_index=(sad_value>>19)&0x7f;
XX = (pCodec->ME_command_queue0[src_index+(0)*4]>>12);
x_result[c]=(XX<<18)>>25;
y_result[c]=(XX<<25)>>25;
}
else
{ // SRC==0
XX = (pCodec->ME_command_queue0[index_array[c]]>>12);
x_result[c]=(XX<<18)>>25;
y_result[c]=(XX<<25)>>25;
}
radd_result[c]=sad_value&0x0fff;
}
for(c=0;c<20;c+=4)
{
#ifdef DUMP_WITH_RADDR
fprintf(pmv_result_file,"\n");
fprintf(pmv_result_file,"Left candidate : 0x%04x, 0x%04x (RADDR:0x%04x)\n",x_result[c]&0x07f,y_result[c]&0x07f,radd_result[c]);
fprintf(pmv_result_file,"Top candidate : 0x%04x, 0x%04x (RADDR:0x%04x)\n",x_result[c+1]&0x07f,y_result[c+1]&0x07f,radd_result[c+1]);
fprintf(pmv_result_file,"Top-Right candidate : 0x%04x, 0x%04x, (RADDR:0x%04x)\n",x_result[c+2]&0x07f,y_result[c+2]&0x07f,radd_result[c+2]);
fprintf(pmv_result_file,"Previous MB candidate : 0x%04x, 0x%04x, (RADDR:0x%04x)\n",x_result[c+3]&0x07f,y_result[c+3]&0x07f,radd_result[c+3]);
#else
fprintf(pmv_result_file,"\n");
fprintf(pmv_result_file,"Left candidate : 0x%04x, 0x%04x\n",x_result[c]&0x07f,y_result[c]&0x07f);
fprintf(pmv_result_file,"Top candidate : 0x%04x, 0x%04x\n",x_result[c+1]&0x07f,y_result[c+1]&0x07f);
fprintf(pmv_result_file,"Top-Right candidate : 0x%04x, 0x%04x\n",x_result[c+2]&0x07f,y_result[c+2]&0x07f);
fprintf(pmv_result_file,"Previous MB candidate : 0x%04x, 0x%04x\n",x_result[c+3]&0x07f,y_result[c+3]&0x07f);
#endif
#if 0
fprintf(pmv_result_file,"\n");
fprintf(pmv_result_file,"Left candidate : (RADDR:0x%04x)\n",radd_result[c]);
fprintf(pmv_result_file,"Top candidate : (RADDR:0x%04x)\n",radd_result[c+1]);
fprintf(pmv_result_file,"Top-Right candidate : (RADDR:0x%04x)\n",radd_result[c+2]);
fprintf(pmv_result_file,"Previous MB candidate : (RADDR:0x%04x)\n",radd_result[c+3]);
#endif
}
#if 0
fprintf(pmv_result_file,"\nFour motion vectors are :\n");
XX = (pCodec->ME_command_queue0[0]>>12);
fprintf(pmv_result_file,"0x%04x,0x%04x,",((XX<<18)>>25)& 0x7f,((XX<<25)>>25)&0x7f);
XX = (pCodec->ME_command_queue0[1]>>12);
fprintf(pmv_result_file,"0x%04x,0x%04x,",((XX<<18)>>25)& 0x7f,((XX<<25)>>25)&0x7f);
XX = (pCodec->ME_command_queue0[2]>>12);
fprintf(pmv_result_file,"0x%04x,0x%04x,",((XX<<18)>>25)& 0x7f,((XX<<25)>>25)&0x7f);
XX = (pCodec->ME_command_queue0[3]>>12);
fprintf(pmv_result_file,"0x%04x,0x%04x",((XX<<18)>>25)& 0x7f,((XX<<25)>>25)&0x7f);
fprintf(pmv_result_file,"\n");
#endif
}
#endif
// checking bit 7 field (ME_INTRA) of CPSTS register, which indicates
// the intra mode or inter mode after motion estimation. 1 means intra mode and 0 means inter mode.
// CPSTS register is at address 0x10028
d_type = (X & 0x80); // check intra/inter mode
if (d_type) {
pCodec->MB_mode = 0;
pMB->mode = MODE_INTRA;
pMB->mv16x_0 = 0;
pMB->mv16y_0 = 0;
pMB->mv16x_1 = 0;
pMB->mv16y_1 = 0;
pMB->mv16x_2 = 0;
pMB->mv16y_2 = 0;
pMB->mv16x_3 = 0;
pMB->mv16y_3 = 0;
} else {
pCodec->MB_mode = 1;
// again, checking bit 6 field (ME_BSIZE) of CPSTS register, which indicates
// the 1MV mode or 4MV mode after motion estimation. 1 means 1MV mode and 0 means 4MV mode.
// CPSTS register is at address 0x10028
if (X & 64) {
pMB->mode = MODE_INTER;
// it's 1MV mode now
// let's get motion vector result from MV0[3]
X = (pCodec->ME_command_queue0[3]);
// store back the motion vector to MV0[1] ??? why ???
pCodec->ME_command_queue0[1] = X;
X = X >> 12;
pMB->mv16x_0 = (X<<18)>>25;
pMB->mv16y_0 = (X<<25)>>25;
pMB->mv16x_1 = (X<<18)>>25;
pMB->mv16y_1 = (X<<25)>>25;
pMB->mv16x_2 = (X<<18)>>25;
pMB->mv16y_2 = (X<<25)>>25;
pMB->mv16x_3 = (X<<18)>>25;
pMB->mv16y_3 = (X<<25)>>25;
// Hmm.. it is used in bit 16 of MCCTL control register (at location 0x1001c) to indicate
// whether current motion vector is zero or not...
pCodec->MVZ = (X == 0);
} else {
pMB->mode = MODE_INTER4V;
X = (pCodec->ME_command_queue0[0]>>12);
pMB->mv16x_0 = (X<<18)>>25;
pMB->mv16y_0 = (X<<25)>>25;
// *vpe_stop = 0x93000000 | ((pMB->mv16x_0 & 0x7f)<<8) | (pMB->mv16y_0 & 0x7f);
X = (pCodec->ME_command_queue0[1]>>12);
pMB->mv16x_1 = (X<<18)>>25;
pMB->mv16y_1 = (X<<25)>>25;
// *vpe_stop = 0x93000000 | ((pMB->mv16x_1 & 0x7f)<<8) | (pMB->mv16y_1 & 0x7f);
X = (pCodec->ME_command_queue0[2]>>12);
pMB->mv16x_2 = (X<<18)>>25;
pMB->mv16y_2 = (X<<25)>>25;
// *vpe_stop = 0x93000000 | ((pMB->mv16x_2 & 0x7f)<<8) | (pMB->mv16y_2 & 0x7f);
X = (pCodec->ME_command_queue0[3]>>12);
pMB->mv16x_3 = (X<<18)>>25;
pMB->mv16y_3 = (X<<25)>>25;
// *vpe_stop = 0x93000000 | ((pMB->mv16x_3 & 0x7f)<<8) | (pMB->mv16y_3 & 0x7f);
// It is used in bit 16 of MCCTL control register (at location 0x1001c) to indicate
// whether current motion vector is zero or not...
pCodec->MVZ = 0;
}
}
#ifdef DUMP_PMV_RESULT
switch(pMB->mode)
{
case MODE_INTRA:
fprintf(pmv_result_file," Mode is Intra mode\n\n");
break;
case MODE_INTER:
fprintf(pmv_result_file," (1MV) MVD are : 0x%04x, 0x%04x\n",((pCodec->ME_command_queue0[11]>>16)&0x0ffff),(pCodec->ME_command_queue0[11]&0x0ffff));
fprintf(pmv_result_file," Mode is Inter 1MV mode\n\n");
break;
case MODE_INTER4V:
fprintf(pmv_result_file," (4MV) MVD are : 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x\n",((pCodec->ME_command_queue0[8]>>16)&0x0ffff), (pCodec->ME_command_queue0[8]&0x0ffff), ((pCodec->ME_command_queue0[9]>>16)&0x0ffff), (pCodec->ME_command_queue0[9]&0x0ffff), ((pCodec->ME_command_queue0[10]>>16)&0x0ffff), (pCodec->ME_command_queue0[10]&0x0ffff), ((pCodec->ME_command_queue0[11]>>16)&0x0ffff), (pCodec->ME_command_queue0[11]&0x0ffff));
fprintf(pmv_result_file," Mode is Inter 4MV mode\n\n");
break;
}
#endif
//#ifdef DUMP_ME_RESULT
//READ_MIN_SADMV(pMB->sad16)
//#endif
/* setup for next ME */
prevMB = &reference->mbs[1];
MOTION_ACTIVITY=abs(pMB->mv16x_1)+abs(pMB->mv16y_1);
d_type=(MOTION_ACTIVITY > L1); // if (MOTION_ACTIVITY > L1) => Large Diamond else Small Diamond
pCodec->ME_command_queue0[19] = (2<<29) | ((pMB->mv16x_1 & 0x7f) << 19) | ((pMB->mv16y_1 & 0x7f) << 12) | pCodec->Raddr + (pMB->mv16y_1 < 0 ? 0 : (pMB->mv16y_1 >> 1)*16);
pCodec->ME_command_queue0[22] = (1<<28) | (2<<29) | ((prevMB->mv16x_3 & 0x7f) << 19) | ((prevMB->mv16y_3 & 0x7f) << 12)
| pCodec->Raddr + (prevMB->mv16y_3 < 0 ? 0 : (prevMB->mv16y_3 >> 1)*16);
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -