?? yuv2rgb_altivec.c.svn-base
字號:
img = in[0]; out = (vector unsigned char *)(oplanes[0]+srcSliceY*outstrides[0]); for (i=0;i<h;i++) { for (j=0;j<w/16;j++) { uyvy = vec_ld (0, img); U = (vector signed short) vec_perm (uyvy, (vector unsigned char)AVV(0), demux_u); V = (vector signed short) vec_perm (uyvy, (vector unsigned char)AVV(0), demux_v); Y = (vector signed short) vec_perm (uyvy, (vector unsigned char)AVV(0), demux_y); cvtyuvtoRGB (c, Y,U,V,&R0,&G0,&B0); uyvy = vec_ld (16, img); U = (vector signed short) vec_perm (uyvy, (vector unsigned char)AVV(0), demux_u); V = (vector signed short) vec_perm (uyvy, (vector unsigned char)AVV(0), demux_v); Y = (vector signed short) vec_perm (uyvy, (vector unsigned char)AVV(0), demux_y); cvtyuvtoRGB (c, Y,U,V,&R1,&G1,&B1); R = vec_packclp (R0,R1); G = vec_packclp (G0,G1); B = vec_packclp (B0,B1); // vec_mstbgr24 (R,G,B, out); out_rgba (R,G,B,out); img += 32; } } return srcSliceH;}/* Ok currently the acceleration routine only supports inputs of widths a multiple of 16 and heights a multiple 2 So we just fall back to the C codes for this.*/SwsFunc yuv2rgb_init_altivec (SwsContext *c){ if (!(c->flags & SWS_CPU_CAPS_ALTIVEC)) return NULL; /* and this seems not to matter too much I tried a bunch of videos with abnormal widths and MPlayer crashes elsewhere. mplayer -vo x11 -rawvideo on:w=350:h=240 raw-350x240.eyuv boom with X11 bad match. */ if ((c->srcW & 0xf) != 0) return NULL; switch (c->srcFormat) { case PIX_FMT_YUV410P: case PIX_FMT_YUV420P: /*case IMGFMT_CLPL: ??? */ case PIX_FMT_GRAY8: case PIX_FMT_NV12: case PIX_FMT_NV21: if ((c->srcH & 0x1) != 0) return NULL; switch(c->dstFormat){ case PIX_FMT_RGB24: av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGB24\n"); return altivec_yuv2_rgb24; case PIX_FMT_BGR24: av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGR24\n"); return altivec_yuv2_bgr24; case PIX_FMT_ARGB: av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ARGB\n"); return altivec_yuv2_argb; case PIX_FMT_ABGR: av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space ABGR\n"); return altivec_yuv2_abgr; case PIX_FMT_RGBA: av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space RGBA\n"); return altivec_yuv2_rgba; case PIX_FMT_BGRA: av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space BGRA\n"); return altivec_yuv2_bgra; default: return NULL; } break; case PIX_FMT_UYVY422: switch(c->dstFormat){ case PIX_FMT_BGR32: av_log(c, AV_LOG_WARNING, "ALTIVEC: Color Space UYVY -> RGB32\n"); return altivec_uyvy_rgb32; default: return NULL; } break; } return NULL;}void yuv2rgb_altivec_init_tables (SwsContext *c, const int inv_table[4],int brightness,int contrast, int saturation){ union { signed short tmp[8] __attribute__ ((aligned(16))); vector signed short vec; } buf; buf.tmp[0] = ((0xffffLL) * contrast>>8)>>9; //cy buf.tmp[1] = -256*brightness; //oy buf.tmp[2] = (inv_table[0]>>3) *(contrast>>16)*(saturation>>16); //crv buf.tmp[3] = (inv_table[1]>>3) *(contrast>>16)*(saturation>>16); //cbu buf.tmp[4] = -((inv_table[2]>>1)*(contrast>>16)*(saturation>>16)); //cgu buf.tmp[5] = -((inv_table[3]>>1)*(contrast>>16)*(saturation>>16)); //cgv c->CSHIFT = (vector unsigned short)vec_splat_u16(2); c->CY = vec_splat ((vector signed short)buf.vec, 0); c->OY = vec_splat ((vector signed short)buf.vec, 1); c->CRV = vec_splat ((vector signed short)buf.vec, 2); c->CBU = vec_splat ((vector signed short)buf.vec, 3); c->CGU = vec_splat ((vector signed short)buf.vec, 4); c->CGV = vec_splat ((vector signed short)buf.vec, 5);#if 0 { int i; char *v[6]={"cy","oy","crv","cbu","cgu","cgv"}; for (i=0; i<6; i++) printf("%s %d ", v[i],buf.tmp[i] ); printf("\n"); }#endif return;}voidaltivec_yuv2packedX (SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, uint8_t *dest, int dstW, int dstY){ int i,j; vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V; vector signed short R0,G0,B0,R1,G1,B1; vector unsigned char R,G,B; vector unsigned char *out,*nout; vector signed short RND = vec_splat_s16(1<<3); vector unsigned short SCL = vec_splat_u16(4); unsigned long scratch[16] __attribute__ ((aligned (16))); vector signed short *YCoeffs, *CCoeffs; YCoeffs = c->vYCoeffsBank+dstY*lumFilterSize; CCoeffs = c->vCCoeffsBank+dstY*chrFilterSize; out = (vector unsigned char *)dest; for (i=0; i<dstW; i+=16){ Y0 = RND; Y1 = RND; /* extract 16 coeffs from lumSrc */ for (j=0; j<lumFilterSize; j++) { X0 = vec_ld (0, &lumSrc[j][i]); X1 = vec_ld (16, &lumSrc[j][i]); Y0 = vec_mradds (X0, YCoeffs[j], Y0); Y1 = vec_mradds (X1, YCoeffs[j], Y1); } U = RND; V = RND; /* extract 8 coeffs from U,V */ for (j=0; j<chrFilterSize; j++) { X = vec_ld (0, &chrSrc[j][i/2]); U = vec_mradds (X, CCoeffs[j], U); X = vec_ld (0, &chrSrc[j][i/2+2048]); V = vec_mradds (X, CCoeffs[j], V); } /* scale and clip signals */ Y0 = vec_sra (Y0, SCL); Y1 = vec_sra (Y1, SCL); U = vec_sra (U, SCL); V = vec_sra (V, SCL); Y0 = vec_clip_s16 (Y0); Y1 = vec_clip_s16 (Y1); U = vec_clip_s16 (U); V = vec_clip_s16 (V); /* now we have Y0= y0 y1 y2 y3 y4 y5 y6 y7 Y1= y8 y9 y10 y11 y12 y13 y14 y15 U= u0 u1 u2 u3 u4 u5 u6 u7 V= v0 v1 v2 v3 v4 v5 v6 v7 Y0= y0 y1 y2 y3 y4 y5 y6 y7 Y1= y8 y9 y10 y11 y12 y13 y14 y15 U0= u0 u0 u1 u1 u2 u2 u3 u3 U1= u4 u4 u5 u5 u6 u6 u7 u7 V0= v0 v0 v1 v1 v2 v2 v3 v3 V1= v4 v4 v5 v5 v6 v6 v7 v7 */ U0 = vec_mergeh (U,U); V0 = vec_mergeh (V,V); U1 = vec_mergel (U,U); V1 = vec_mergel (V,V); cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0); cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1); R = vec_packclp (R0,R1); G = vec_packclp (G0,G1); B = vec_packclp (B0,B1); switch(c->dstFormat) { case PIX_FMT_ABGR: out_abgr (R,G,B,out); break; case PIX_FMT_BGRA: out_bgra (R,G,B,out); break; case PIX_FMT_ARGB: out_argb (R,G,B,out); break; case PIX_FMT_RGB24: out_rgb24 (R,G,B,out); break; case PIX_FMT_BGR24: out_bgr24 (R,G,B,out); break; default: { /* If this is reached, the caller should have called yuv2packedXinC instead. */ static int printed_error_message; if (!printed_error_message) { av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n", sws_format_name(c->dstFormat)); printed_error_message=1; } return; } } } if (i < dstW) { i -= 16; Y0 = RND; Y1 = RND; /* extract 16 coeffs from lumSrc */ for (j=0; j<lumFilterSize; j++) { X0 = vec_ld (0, &lumSrc[j][i]); X1 = vec_ld (16, &lumSrc[j][i]); Y0 = vec_mradds (X0, YCoeffs[j], Y0); Y1 = vec_mradds (X1, YCoeffs[j], Y1); } U = RND; V = RND; /* extract 8 coeffs from U,V */ for (j=0; j<chrFilterSize; j++) { X = vec_ld (0, &chrSrc[j][i/2]); U = vec_mradds (X, CCoeffs[j], U); X = vec_ld (0, &chrSrc[j][i/2+2048]); V = vec_mradds (X, CCoeffs[j], V); } /* scale and clip signals */ Y0 = vec_sra (Y0, SCL); Y1 = vec_sra (Y1, SCL); U = vec_sra (U, SCL); V = vec_sra (V, SCL); Y0 = vec_clip_s16 (Y0); Y1 = vec_clip_s16 (Y1); U = vec_clip_s16 (U); V = vec_clip_s16 (V); /* now we have Y0= y0 y1 y2 y3 y4 y5 y6 y7 Y1= y8 y9 y10 y11 y12 y13 y14 y15 U = u0 u1 u2 u3 u4 u5 u6 u7 V = v0 v1 v2 v3 v4 v5 v6 v7 Y0= y0 y1 y2 y3 y4 y5 y6 y7 Y1= y8 y9 y10 y11 y12 y13 y14 y15 U0= u0 u0 u1 u1 u2 u2 u3 u3 U1= u4 u4 u5 u5 u6 u6 u7 u7 V0= v0 v0 v1 v1 v2 v2 v3 v3 V1= v4 v4 v5 v5 v6 v6 v7 v7 */ U0 = vec_mergeh (U,U); V0 = vec_mergeh (V,V); U1 = vec_mergel (U,U); V1 = vec_mergel (V,V); cvtyuvtoRGB (c, Y0,U0,V0,&R0,&G0,&B0); cvtyuvtoRGB (c, Y1,U1,V1,&R1,&G1,&B1); R = vec_packclp (R0,R1); G = vec_packclp (G0,G1); B = vec_packclp (B0,B1); nout = (vector unsigned char *)scratch; switch(c->dstFormat) { case PIX_FMT_ABGR: out_abgr (R,G,B,nout); break; case PIX_FMT_BGRA: out_bgra (R,G,B,nout); break; case PIX_FMT_RGBA: out_rgba (R,G,B,nout); break; case PIX_FMT_ARGB: out_argb (R,G,B,nout); break; case PIX_FMT_RGB24: out_rgb24 (R,G,B,nout); break; case PIX_FMT_BGR24: out_bgr24 (R,G,B,nout); break; default: /* Unreachable, I think. */ av_log(c, AV_LOG_ERROR, "altivec_yuv2packedX doesn't support %s output\n", sws_format_name(c->dstFormat)); return; } memcpy (&((uint32_t*)dest)[i], scratch, (dstW-i)/4); }}
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -