?? quant1_h263.asm
字號(hào):
.global _xhQuantInvIntra_H263_C1I;
.global _xhQuantInv_H263_C1I;
.global _xhQuantIntra_H263_C1I;
.global _xhQuant_H263_C1I;
.extern _mZigZagScan;
.section L1_data_b;
.global _multipliers_H263;
//liu 20061025
.var _multipliers_H263[32] =
{
0x0, 0x8001, 0x4001, 0x2aab,
0x2001, 0x199a, 0x1556, 0x124a,
0x1001, 0x0e39, 0x0ccd, 0x0ba3,
0x0aab, 0x09d9, 0x0925, 0x0889,
0x0801, 0x0788, 0x071d, 0x06bd,
0x0667, 0x0619, 0x05d2, 0x0591,
0x0556, 0x051f, 0x04ed, 0x04be,
0x0493, 0x046a, 0x0445, 0x0422
};
.var _multipliers_H263_P[32] =
{
0x00000000, 0x80018001, 0x40014001, 0x2aab2aab,
0x20012001, 0x199a199a, 0x15561556, 0x124a124a,
0x10011001, 0x0e390e39, 0x0ccd0ccd, 0x0ba30ba3,
0x0aab0aab, 0x09d909d9, 0x09250925, 0x08890889,
0x08010801, 0x07880788, 0x071d071d, 0x06bd06bd,
0x06670667, 0x06190619, 0x05d205d2, 0x05910591,
0x05560556, 0x051f051f, 0x04ed04ed, 0x04be04be,
0x04930493, 0x046a046a, 0x04450445, 0x04220422
};
/*
.var _multipliers_H263_P[32] =
{
0x00000000, 0x80008000, 0x40004000, 0x2aab2aaa,
0x20002000, 0x19991999, 0x15551555, 0x12491249,
0x10001000, 0x0e380e38, 0x0ccc0ccc, 0x0ba20ba2,
0x0aaa0aaa, 0x09d809d8, 0x09240924, 0x08880888,
0x08000800, 0x07870787, 0x071c071c, 0x06bc06bc,
0x06660666, 0x06180618, 0x05d105d1, 0x05900590,
0x05550555, 0x051e051e, 0x04ec04ec, 0x04bd04bd,
0x04920492, 0x04690469, 0x04440444, 0x04210421
};
*/
.section L1_code;
.align 4;
#if 0
/*******************************************************
performance:
ASM C
cycle count:1312 4451
Ipp32u xhQuantInvIntra_H263_C1I( Ipp16s* pSrcDst, int QP);
*****************dequant_intra_h263*******************/
_xhQuantInvIntra_H263_C1I:
[--sp]=(r7:4,p5:3);
[--sp]=i0;
[--sp]=l0;
[--sp]=i1;
[--sp]=l1;
[--sp]=rets;
l0=0;
i0=r0;
l1=0;
i1=r0;
r3=r1<<1; //quant_m_2
cc=bittst(r1,0);
r5=1;
r6=r1;
r1=r1-r5;
if cc r1=r6; //quant_add
m0=2;
i0+=m0;//to start from ac coeff,skip the dc coeff
i1=i0;
r0.l=w[i0++];//load the data
p0=63;
lsetup (ac_loop_h263_intra_inv_start,ac_loop_h263_intra_inv_end) lc0=p0;
ac_loop_h263_intra_inv_start:
r0=r0.l(x);
cc=r0;
r7=r0;
if !cc jump ac_loop_store;
cc=bittst(r0,31);
r2=abs r0;
r2*=r3;
r7=r2+r1;
if cc jump negative;
r4=2047;
cc=r7<r4;
if !cc r7=r4;
jump ac_loop_store;
negative:
r4=2048;
cc=r7<r4;
r7=-r7;
r4=-r4;
if !cc r7=r4;
ac_loop_store:
w[i1++]=r7.l;
ac_loop_h263_intra_inv_end:
r0.l=w[i0++];
rets=[sp++];
l1=[sp++];
i1=[sp++];
l0=[sp++];
i0=[sp++];
(r7:4,p5:3)=[sp++];
_xhQuantInvIntra_H263_C1I.end:
rts;
#else
/************changed by gary 2007-06-28****************
performance:
ASM C
cycle count:
Ipp32u xhQuantInvIntra_H263_C1I( Ipp16s* pSrcDst, int QP);
*****************dequant_intra_h263*******************/
_xhQuantInvIntra_H263_C1I:
[--sp]=(r7:4,p5:3);
[--sp]=rets;
l0=0;
l1=0;
i0=r0;
b0=r0; //store base addr;
i1=r0;
r0=r1<<1; //quant_m_2
r3=r0<<16;
r3=r3+r0; //r3.l=r3.h=r1<<1;
cc=bittst(r1,0);
r5=1;
r6=r1;
r1=r1-r5;
if cc r1=r6; //quant_add
r6=r1<<16;
r2=r6+r1; //r2.l=r2.h=r1.l=r6.h, r1.h=r6.l=0;
p0=32;
r0=[i0++]; //load the data
r7=r0.l(z);
r0.l=0;
p5=r7;
r7=0;
lsetup (ac_loop_h263_intra_inv_start,ac_loop_h263_intra_inv_end) lc0=p0;
ac_loop_h263_intra_inv_start:
r5=abs r0(v);
r5.l=r5.l*r3.l, r5.h=r5.h*r3.h(iu);
r5=r5+|+r2(s);
r4=r5.l(z);
r5.l=0;
cc= r4==r1;
if cc r4=r7; //reset to 0;
cc= r5==r6;
if cc r5=r7; //reset to 0;
r4.h=r4.l=sign(r0.h)*r4.h+sign(r0.l)*r4.l;
r5.h=r5.l=sign(r0.h)*r5.h+sign(r0.l)*r5.l;
r4=pack(r5.l,r4.l);
r4=r4<<4(v,s);
r4=r4>>>4(v);
ac_loop_h263_intra_inv_end:
[i1++]=r4 || r0=[i0++];
r7=p5;
i0=b0;
w[i0]=r7.l; //restore dc;
rets=[sp++];
(r7:4,p5:3)=[sp++];
_xhQuantInvIntra_H263_C1I.end:
rts;
#endif //end of inv intra quant func selection
#if 0
/*********************************************************
*******************QuantInv_H263_C1I**********************
performance:
ASM C
cycle count:992 3072
Ipp32u xhQuantInv_H263_C1I( Ipp16s* pSrcDst, int QP );
*********************************************************/
_xhQuantInv_H263_C1I:
[--sp]=(r7:4,p5:3);
[--sp]=i0;
[--sp]=l0;
[--sp]=i1;
[--sp]=l1;
[--sp]=rets;
l0=0;
i0=r0;
l1=0;
i1=r0;
r3=r1<<1; //quant_m_2
cc=bittst(r1,0);
r5=1;
r6=r1;
r1=r1-r5;
if cc r1=r6; //quant_add
r0.l=w[i0++];
p0=64;
lsetup (acdc_loop_h263_inter_inv_start,acdc_loop_h263_inter_inv_end)lc0=p0;
acdc_loop_h263_inter_inv_start:
r0=r0.l(x);
cc=r0;
r7=r0;
if !cc jump acdc_zero;
cc=bittst(r0,31);
r2=abs r0;
r2*=r3;
r7=r2+r1;
if cc jump acdc_negative;
r4=2047;
cc=r7<r4;
if !cc r7=r4;
jump acdc_zero;
acdc_negative:
r4=2048;
cc=r7<r4;
r7=-r7;
r4=-r4;
if !cc r7=r4;
acdc_zero:
w[i1++]=r7.l;
acdc_loop_h263_inter_inv_end:
r0.l=w[i0++];
rets=[sp++];
l1=[sp++];
i1=[sp++];
l0=[sp++];
i0=[sp++];
(r7:4,p5:3)=[sp++];
_xhQuantInv_H263_C1I.end:
rts;
#else
/*******************QuantInv_H263_C1I**********************
*************changed by gary *****************************
performance:
ASM C
cycle count:
Ipp32u xhQuantInv_H263_C1I( Ipp16s* pSrcDst, int QP );
*********************************************************/
_xhQuantInv_H263_C1I:
[--sp]=(r7:4,p5:3);
[--sp]=rets;
l0=0;
l1=0;
i0=r0;
i1=r0;
r0=r1<<1; //quant_m_2
r3=r0<<16;
r3=r3+r0; //r3.l=r3.h=r1<<1;
cc=bittst(r1,0);
r5=1;
r6=r1;
r1=r1-r5;
if cc r1=r6; //quant_add
r6=r1<<16;
r2=r6+r1; //r2.l=r2.h=r1.l=r6.h, r1.h=r6.l=0;
r7=0;
p0=32;
r0=[i0++]; //load the data
lsetup (acdc_loop_h263_inter_inv_start,acdc_loop_h263_inter_inv_end) lc0=p0;
acdc_loop_h263_inter_inv_start:
r5=abs r0(v);
r5.l=r5.l*r3.l, r5.h=r5.h*r3.h(iu);
r5=r5+|+r2(s);
r4=r5.l(z);
r5.l=0;
cc = r4==r1;
if cc r4=r7; //reset to 0;
cc = r5==r6;
if cc r5=r7; //reset to 0;
r4.h=r4.l=sign(r0.h)*r4.h+sign(r0.l)*r4.l;
r5.h=r5.l=sign(r0.h)*r5.h+sign(r0.l)*r5.l;
r4=pack(r5.l,r4.l);
r4=r4<<4(v,s);
r4=r4>>>4(v);
acdc_loop_h263_inter_inv_end:
[i1++]=r4 || r0=[i0++];
rets=[sp++];
(r7:4,p5:3)=[sp++];
_xhQuantInv_H263_C1I.end:
rts;
#endif
#if 0
/***********************************************************
*****************quant_h263_intra**************************
performance:
ASM C
cycle count:1709 6442
Ipp32u xhQuantIntra_H263_C1I(Ipp16s* pSrcDst,Ipp32s QP,Ipp32s* pCount);
************************************************************/
_xhQuantIntra_H263_C1I:
[--sp]=(r7:4,p5:3);
[--sp]=i0;
[--sp]=l0;
[--sp]=i1;
[--sp]=l1;
[--sp]=i2;
[--sp]=l2;
[--sp]=i3;
[--sp]=l3;
[--sp]=rets;
l0=0;
i0=r0;
b0=r0;//reserve for the calculate the pcount
l1=0;
l3=0;
i1=r0;
r7=r1<<1;
l2=0;
i2.l=_multipliers_H263;
i2.h=_multipliers_H263;
i3.l=_mZigZagScan;
i3.h=_mZigZagScan;
m0=2;
r4=r1<<2;
m2=r4;
i2+=m2;
//p3=8;
i0+=m0;
i1+=m0;//to start from ac coeff
// p5=i0;
//p4=i1;
p0=63;
r0=[i2];//mult
r4.l=w[i0++];
lsetup (ac_loop_h263_intra_start,ac_loop_h263_intra_end)lc0=p0;
ac_loop_h263_intra_start:
r4=r4.l(x);
r5=abs r4;
cc=r5<r7;
//liu 20061010 changed start
r6=0;
if cc jump ac_zero;
r5*=r0;
r5>>=16;//_SCALEBITS_H263
cc=bittst(r4,31);
r6=-r5;
if !cc r6=r5;
//liu changed end
/* r6=0;
if cc jump ac_zero;
r6=r5.l*r0.l;
r6>>=16;//_SCALEBITS_H263
cc=bittst(r4,31);
r5=-r6;
if cc r6=r5;
*/
ac_zero:
w[i1++]=r6.l;
ac_loop_h263_intra_end:
r4.l=w[i0++];
i1=r2;//pcount
//w[i1]=-1;
p3=i3;
p3+=1;
p0=63;
r7=b[p3++](z);
r3=-1;//in order to conform with the ippi
r6=b0;
lsetup(pcount_start,pcount_end)lc0=p0;
pcount_start:
r4=r7<<1;
r4=r6+r4;
i0=r4;
r5.l=w[i0];
r5=r5.l(x);
cc=r5;
if cc r3=r7;
pcount_end:
r7=b[p3++](z);
?? 快捷鍵說明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -