?? encoder.c
字號:
//pCodec->DMA_COMMAND_local[34] = (uint32_t) 0;
pCodec->DMA_COMMAND_local[35] = (uint32_t) 0x4B01010; // make it group ID 1, disable this command
//pCodec->DMA_COMMAND_local[38+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) (8+1-4) << 24 | 4 << 20;
pCodec->DMA_COMMAND_local[39+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) 0x840010;
//pCodec->DMA_COMMAND_local[26+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) 0;
pCodec->DMA_COMMAND_local[27+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) 0x4B01040; // make it group ID 1, disable this command
//pCodec->DMA_COMMAND_local[30+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) 0;
pCodec->DMA_COMMAND_local[31+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) 0x4B01010; // make it group ID 1, disable this command
//pCodec->DMA_COMMAND_local[34+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) 0;
pCodec->DMA_COMMAND_local[35+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) 0x4B01010; // make it group ID 1, disable this command
// set the related DMA commands to move the reference block to local memory
// since we use DMA double buffer, but for the very first time, we will move the reference image to
// local memory twice by using the upper part of DMA double buffer
// so, let's increment it for just 256 bytes
pCodec->DMA_COMMAND_local[0] = ((uint32_t) pEnc->reference->reconstruct.y - (256*XDIM/16) | 0x06);
// since we use DMA double buffer, so the increment for Y block of reference image is 512 bytes instead of 256 bytes
pCodec->DMA_COMMAND_local[0+DMA_COMMAND_QUEUE_STRIDE] = ((uint32_t) pEnc->reference->reconstruct.y - (256*XDIM/16) + 512 | 0x07);
pCodec->DMA_COMMAND_local[1] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(REF_Y);
pCodec->DMA_COMMAND_local[1+DMA_COMMAND_QUEUE_STRIDE] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS((uint32_t)REF_Y+32);
// local memory frame width is 16 words, and block width is 4 words.
// system memory frame width is (XDIM*4) words and block width is 32 words(128 bytes).
pCodec->DMA_COMMAND_local[2] = ((16+1-4)<<24) | 4 << 20 | ((XDIM*4)-63)<<6 | 32;
pCodec->DMA_COMMAND_local[2+DMA_COMMAND_QUEUE_STRIDE] = ((16+1-4)<<24) | 4 << 20 | ((XDIM*4)-63)<<6 | 32;
pCodec->DMA_COMMAND_local[3] = 0x4A50000 | 192; // enable Transfer Done flag mask
// Enable DMA start transferring
// Enable chain transfer
// From 2D System memory to 2D Local memory
// transfer 192 words (768 bytes = 3 Y blocks)
pCodec->DMA_COMMAND_local[3+DMA_COMMAND_QUEUE_STRIDE] = 0x4A50000 | 192; // enable Transfer Done flag mask
// Enable DMA start transferring
// Enable chain transfer
// From 2D System memory to 2D Local memory
// transfer 192 words (768 bytes = 3 Y blocks)
// since we use DMA double buffer, but for the very first time, we will move the reference image to
// local memory twice by using the upper part of DMA double buffer
// so, let's increment it for just 64 bytes
pCodec->DMA_COMMAND_local[4] = ((uint32_t) pEnc->reference->reconstruct.u - (64*XDIM/16) | 0x04);
// since we use DMA double buffer, so the increment for U block of reference image is 128 bytes instead of 64 bytes
pCodec->DMA_COMMAND_local[4+DMA_COMMAND_QUEUE_STRIDE] = ((uint32_t) pEnc->reference->reconstruct.u - (64*XDIM/16) + 128 | 0x05); // 4 - 7 for Ref U Load Next MB
pCodec->DMA_COMMAND_local[5] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(REF_U);
pCodec->DMA_COMMAND_local[5+DMA_COMMAND_QUEUE_STRIDE] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS((uint32_t)REF_U+16);
pCodec->DMA_COMMAND_local[6] = (uint32_t) ((8+1-2)<<24) | 2 << 20 | ((XDIM)-15)<<6 | 8;
pCodec->DMA_COMMAND_local[6+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) ((8+1-2)<<24) | 2 << 20 | ((XDIM)-15)<<6 | 8;
pCodec->DMA_COMMAND_local[7] = 0x4A50000 | 48; // chain enable,
pCodec->DMA_COMMAND_local[7+DMA_COMMAND_QUEUE_STRIDE] = 0x4A50000 | 48; // chain enable,
// since we use DMA double buffer, but for the very first time, we will move the reference image to
// local memory twice by using the upper part of DMA double buffer
// so, let's increment it for just 64 bytes
pCodec->DMA_COMMAND_local[8] = ((uint32_t) pEnc->reference->reconstruct.v - (64*XDIM/16) | 0x04); // 16 - 19 for Ref V Load Next MB
// since we use DMA double buffer, so the increment for V block of reference image is 128 bytes instead of 64 bytes
pCodec->DMA_COMMAND_local[8+DMA_COMMAND_QUEUE_STRIDE] = ((uint32_t) pEnc->reference->reconstruct.v - (64*XDIM/16) +128 | 0x05); // 16 - 19 for Ref V Load Next MB
pCodec->DMA_COMMAND_local[9] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(REF_V);
pCodec->DMA_COMMAND_local[9+DMA_COMMAND_QUEUE_STRIDE] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS((uint32_t)REF_V+16);
pCodec->DMA_COMMAND_local[10] = (uint32_t) ((8+1-2)<<24) | 2 << 20 | ((XDIM)-15)<<6 | 8;
pCodec->DMA_COMMAND_local[10+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) ((8+1-2)<<24) | 2 << 20 | ((XDIM)-15)<<6 | 8;
pCodec->DMA_COMMAND_local[11] = 0x4A50000 | 48; // chain disable
pCodec->DMA_COMMAND_local[11+DMA_COMMAND_QUEUE_STRIDE] = 0x4A50000 | 48; // chain disable
// set the related DMA commands to move the current blocks to local memory
// since we use DMA double buffer, so the increment for Y block of current image is 512 bytes instead of 256 bytes
pCodec->DMA_COMMAND_local[12] = ((uint32_t) pEnc->current1->image.y | 0x07);
pCodec->DMA_COMMAND_local[12+DMA_COMMAND_QUEUE_STRIDE] = ((uint32_t) pEnc->current1->image.y + 256 | 0x07);
pCodec->DMA_COMMAND_local[13] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(CUR_Y0);
pCodec->DMA_COMMAND_local[13+DMA_COMMAND_QUEUE_STRIDE] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(CUR_Y1);
pCodec->DMA_COMMAND_local[15] = (uint32_t) 0x4A00040; // enable Transfer Done flag mask
// Enable DMA start transferring
// Enable chain transfer
// From sequential System memory to sequential Local memory
// transfer 0x40 words (256 bytes)
pCodec->DMA_COMMAND_local[15+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) 0x4A00040;
// since we use DMA double buffer, so the increment for U block of current image is 128 bytes instead of 64 bytes
pCodec->DMA_COMMAND_local[16] = ((uint32_t) pEnc->current1->image.u | 0x05);
pCodec->DMA_COMMAND_local[16+DMA_COMMAND_QUEUE_STRIDE] = ((uint32_t) pEnc->current1->image.u + 64 | 0x05);
pCodec->DMA_COMMAND_local[17] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(CUR_U0);
pCodec->DMA_COMMAND_local[17+DMA_COMMAND_QUEUE_STRIDE] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(CUR_U1);
pCodec->DMA_COMMAND_local[19] = (uint32_t) 0x4A00010; // enable Transfer Done flag mask
// Enable DMA start transferring
// Enable chain transfer
// From sequential System memory to sequential Local memory
// transfer 0x10 words (64 bytes)
pCodec->DMA_COMMAND_local[19+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) 0x4A00010;
// since we use DMA double buffer, so the increment for V block of current image is 128 bytes instead of 64 bytes
pCodec->DMA_COMMAND_local[20] = ((uint32_t) pEnc->current1->image.v | 0x05);
pCodec->DMA_COMMAND_local[20+DMA_COMMAND_QUEUE_STRIDE] = ((uint32_t) pEnc->current1->image.v + 64 | 0x05);
pCodec->DMA_COMMAND_local[21] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(CUR_V0);
pCodec->DMA_COMMAND_local[21+DMA_COMMAND_QUEUE_STRIDE] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(CUR_V1);
pCodec->DMA_COMMAND_local[23] = (uint32_t) 0x4A00010; // enable Transfer Done flag mask
// Enable DMA start transferring
// Enable chain transfer
// From sequential System memory to sequential Local memory
// transfer 0x10 words (64 bytes)
pCodec->DMA_COMMAND_local[23+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) 0x4A00010;
// prepare the DMA commands for loading predictor
pCodec->DMA_COMMAND_local[36] = (uint32_t) pCodec->pred_value_phy; // predictor
pCodec->DMA_COMMAND_local[36+ DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) (pCodec->pred_value_phy); // predictor
pCodec->DMA_COMMAND_local[37] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(LOCAL_PREDICTOR0);
pCodec->DMA_COMMAND_local[37+DMA_COMMAND_QUEUE_STRIDE] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS(LOCAL_PREDICTOR0);
pCodec->DMA_COMMAND_local[39] = (uint32_t) 0x840010; // disable Transfer Done flag mask
// Enable DMA start transferring
// Disable chain transfer
// From sequqntial System memory to 2D Local memory
// transfer 0x10 words (64 bytes)
pCodec->DMA_COMMAND_local[39+DMA_COMMAND_QUEUE_STRIDE] = (uint32_t) 0x840010;
pCodec->DMA_COMMAND_local[24] = ((uint32_t) pEnc->current1->reconstruct.y + 256) | 0x07;
pCodec->DMA_COMMAND_local[24+DMA_COMMAND_QUEUE_STRIDE] = ((uint32_t) pEnc->current1->reconstruct.y) | 0x07;
pCodec->DMA_COMMAND_local[28] = ((uint32_t) pEnc->current1->reconstruct.u + 64) | 0x05;
pCodec->DMA_COMMAND_local[28+DMA_COMMAND_QUEUE_STRIDE] = ((uint32_t) pEnc->current1->reconstruct.u) | 0x05;
pCodec->DMA_COMMAND_local[32] = ((uint32_t) pEnc->current1->reconstruct.v + 64) | 0x05;
pCodec->DMA_COMMAND_local[32+DMA_COMMAND_QUEUE_STRIDE] = ((uint32_t) pEnc->current1->reconstruct.v) | 0x05;
// begin to move the local DMA commands (from 0th to 0+40(0x1C)-1=39th)
// to the system memory and start DMA procedure..
DMA_MOVE(0, 0x4B00028) // enable Transfer Done flag mask
// Enable DMA start transferring
// Enable chain transfer
// From sequential Local memory to sequential System memory
// transfer 0x28(40) words
RTL_DEBUG_OUT(0x95000000 | (uint32_t) pCodec->pred_value_phy)
pEnc->current1->fcode = pEnc->mbParam.m_fcode;
READ_ASADR(*pBits) // 64 byte
READ_BALR(x) // compressed data in local memory (words)
READ_VOADR(y) // compressed data in local memory (bits)
x = (x & 0x3c);
y = y & 0xff;
bit_header = y + x*8; // total bits in local memory
pEnc->current1->coding_type = P_VOP;
if (pEnc->mbParam.h263) {
pEnc->mbParam.m_rounding_type = 0;
pEnc->current1->rounding_type = pEnc->mbParam.m_rounding_type;
BitstreamWriteShortHeader(&pEnc->mbParam, pEnc->current1, 1,pEnc->pCodec);
} else {
pEnc->mbParam.m_rounding_type = 1 - pEnc->mbParam.m_rounding_type;
pEnc->current1->rounding_type = pEnc->mbParam.m_rounding_type;
if (vol_header)
BitstreamWriteVolHeader(&pEnc->mbParam, pEnc->current1,pEnc->pCodec);
BitstreamWriteVopHeader(&pEnc->mbParam, pEnc->current1, 1,pEnc->pCodec);
}
// check DMA is done
POLL_MARKER_S
while((pmdma->Status & 0x1) == 0);
POLL_MARKER_E
// we just want to move the the reference image again, so we disable the chain
pCodec->DMA_COMMAND_local[1] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS((uint32_t)REF_Y + 16);
pCodec->DMA_COMMAND_local[5] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS((uint32_t)REF_U + 8);
pCodec->DMA_COMMAND_local[9] = TRANSLATE_LOCAL_MEMORY_BASE_ADDRESS((uint32_t)REF_V + 8);
pCodec->DMA_COMMAND_local[11] = 0x850000 | 48;
// begin to move the reference image to local memory again (from 0th to 0+12(0xC)-1=11th)
// by using the upper part of DMA command buffer
DMA_MOVE(0, 0x4B0000C) // enable Transfer Done flag mask
// Enable DMA start transferring
// Enable chain transfer
// From sequential Local memory to sequential System memory
// transfer 0xC(12) words
// the reason why we set the MCCADDR register just here for once is because during
// P-frame encoding , unlike I-frame encoding , while ME engine is activated, the
// ME engine will copy the current blocks to the address that was set by MCCADDR
// register by the way.
SET_MCCADDR(CUR_Y2)
SET_QAR(DZQAR) // quantization table
RTL_DEBUG_OUT(0x91000000)
pCodec->even_odd_1 = 0;
pCodec->even_odd_I = 0;
pCodec->acdc_status = 7;
pCodec->triple_buffer_selector = 0;
pMB = pEnc->current1->mbs;
pMB->quant = pEnc->current1->quant;
// initialize Raddr and Raddr23 in word address
pCodec->Raddr = (((uint32_t) REF_Y + 64*16) >> 2) & 0xfff;
pCodec->Raddr23 = (((uint32_t) REF_Y + 64*(16+8)) >> 2) & 0xfff; //for block 2,3
#ifdef DUMP_PMV_RESULT
fprintf(pmv_result_file,"Macroblock %d (x=%d,y=%d)\n",0,0,0);
#endif
bIntra = MotionEstimation_block0(pMB,pEnc);
#ifdef DUMP_ME_RESULT
fprintf(me_result_file,"Macroblock %d (x=%d,y=%d) :",0,0,0);
if(pEnc->mbParam.enable_4mv)
fprintf(me_result_file,"0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x\n",(pMB->mv16x_0&0x07f), (pMB->mv16y_0&0x07f), (pMB->mv16x_1&0x07f), (pMB->mv16y_1&0x07f), (pMB->mv16x_2&0x07f), (pMB->mv16y_2&0x07f), (pMB->mv16x_3&0x07f), (pMB->mv16y_3&0x07f));
else
// to be compatible with C Model encoder's output format
fprintf(me_result_file,"0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x, 0x%04x\n",(pMB->mv16x_3&0x07f), (pMB->mv16y_3&0x07f), (pMB->mv16x_3&0x07f), (pMB->mv16y_3&0x07f), (pMB->mv16x_3&0x07f), (pMB->mv16y_3&0x07f), (pMB->mv16x_3&0x07f), (pMB->mv16y_3&0x07f));
#endif
RTL_DEBUG_OUT(0x92000000)
x = 1;
data_64b = 2;
for (y = 0; y < pEnc->mbParam.mb_height; y++) {
for ( ; x < pEnc->mbParam.mb_width; x++) {
pMB->quant = pEnc->current1->quant;
if (pEnc->mbParam.resyn==1)
{
// for resync marker, since we should set the bound for PMV (performed
// by hardware) once the resync marker is enabled, so we set the bit 3 of MECTL
// register to notify the hardware
if(x==0) pCodec->ME_COMMAND|=8;
if (!pEnc->mbParam.h263)
{
if ((y!=0) & (x==1))
{
// In core_version_1 ,the hardware register (CPSTS) did not provide the
// bit to check whether the VLC engine is done or not. Without the bit
// potentially, the follwing codes in core_version_1 will cause bitstream
// buffer contention if we want to insert the resync marker to bitstream
// buffer while VLC is not done and still accessing the same bitstream
// buffer at the same time. Therefore, core_vesion_2 hardware has
// provided another bit on bit15 of CPSTS register to let software check
// the VLC done status in order to avoid bitstream buffer contention.
// check whether the VLC is done or not
// CPSTS register is at address 0x10028
int32_t cpsts;
do {
READ_CPSTS(cpsts)
} while (!(cpsts&0x08000));
BitstreamPadAlways(pCodec);
BitstreamPutBits(VIDO_RESYN_MARKER, 17,pCodec);
BitstreamPutBits((x-1) + y*pEnc->mbParam.mb_width, log2bin(pEnc->mbParam.mb_width * pEnc->mbParam.mb_height - 1),pCodec);
BitstreamPutBits(pMB->quant, 5,pCodec);
BitstreamPutBit(0,pCodec);
}
}
else
{
if ((y!=0) & (x==1))
{
// In core_version_1 ,the hardware register (CPSTS) did not provide the
// bit to check whether the VLC engine is done or not. Without the bit
// potentially, the follwing codes in core_version_1 will cause bitstream
// buffer contention if we want to insert the resync marker to bitstream
// buffer while VLC is not done and still accessing the same bitstream
// buffer at the same time. Therefore, core_vesion_2 hardware has
// provided another bit on bit15 of CPSTS register to let software check
// the VLC done status in order to avoid bitstream buffer contention.
// check whether the VLC is done or not
// CPSTS register is at address 0x10028
int32_t cpsts;
do {
READ_CPSTS(cpsts)
} while (!(cpsts&0x08000));
BitstreamPutBits(VIDO_RESYN_MARKER, 17,pCodec);
BitstreamPutBits(y, 5,pCodec);
BitstreamPutBits(0, 2,pCodec); // ID
BitstreamPutBits(pMB->quant, 5,pCodec);
}
}
}
pMB_mc = pMB;
pMB++;
pCodec->even_odd_1 ^= 1;
pCodec->triple_buffer_selector = (++pCodec->triple_buffer_selector) % 3;
RTL_DEBUG_OUT(0x91000000 | y << 12 | x)
#ifdef DUMP_PMV_RESULT
fprintf(pmv_result_file,"Macroblock %d (x=%d,y=%d)\n",x+y*pEnc->mbParam.mb_width,x,y);
#endif
bIntra = MotionEstimation(pMB, pMB_mc, x, y, &pEnc->mbParam, pEnc, data_64b);
#ifdef DUMP_ME_RESU
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -