?? transferidct_mmx.c

?? MPEG4編解C源代碼,已移植到ARM和DSP上.
?? C
字號:
/**************************************************************************
 *                                                                        *
 * This code has been developed by John Funnell. This software is an      *
 * implementation of a part of one or more MPEG-4 Video tools as          *
 * specified in ISO/IEC 14496-2 standard.  Those intending to use this    *
 * software module in hardware or software products are advised that its  *
 * use may infringe existing patents or copyrights, and any such use      *
 * would be at such party's own risk.  The original developer of this     *
 * software module and his/her company, and subsequent editors and their  *
 * companies (including Project Mayo), will have no liability for use of  *
 * this software or modifications or derivatives thereof.                 *
 *                                                                        *
 * Project Mayo gives users of the Codec a license to this software       *
 * module or modifications thereof for use in hardware or software        *
 * products claiming conformance to the MPEG-4 Video Standard as          *
 * described in the Open DivX license.                                    *
 *                                                                        *
 * The complete Open DivX license can be found at                         *
 * http://www.projectmayo.com/opendivx/license.php                        *
 *                                                                        *
 **************************************************************************/
/**
*  Copyright (C) 2001 - Project Mayo
 *
 * John Funnell
 *
 * DivX Advanced Research Center <darc@projectmayo.com>
**/
// transferIDCT_mmx.c //

/* routines to transfer iDCT block result into the output picture */

/* this routine still needs to be optimised for pipelining        */
/* just haven't got time to finish the optimisations here ;-)     */

/* inline masm is used here so not compatible with gnu            */



//#define _TEST_TRANSFER

#ifdef _TEST_TRANSFER
#include <stdio.h>
#endif


#include "portab.h"

void transferIDCT_add(int16_t *sourceS16, uint8_t *destU8, int stride) {
	#ifdef _TEST_TRANSFER
	uint8_t reference_dest[64];
	int x, y, sum16;

	/* populate reference_dest[] with the correct result... */
	for (y=0; y<8; y++) {
		for (x=0; x<8; x++) {
			sum16 = (destU8[stride*y + x] + sourceS16[8*y + x]);
			if      (sum16 > 255) reference_dest[8*y + x] = 255;
			else if (sum16 <   0) reference_dest[8*y + x] =   0;
			else                  reference_dest[8*y + x] = (uint8_t)sum16;
		}
	}
	#endif

	_asm {

	; not sure about the state handling here - there must be a better way
	push eax
	push ebx
	push edi

	mov eax, sourceS16           ;  parameter 1, *sourceS16
	mov ebx, destU8              ;  parameter 2, *destU8
	mov edi, stride              ;  parameter 3, stride
	pxor mm7, mm7                ;  set mm7 = 0

; lines 0 to 7 all scheduled in together
	movq mm0,  qword ptr [ebx]   ;  eight bytes of destination into mm4
	movq mm1,  mm0               ;  eight bytes of destination into mm0
	punpcklbw mm0, mm7           ;  unpack first 4 bytes from dest into mm4
	punpckhbw mm1, mm7           ;  unpack next 4 bytes from dest into mm5
	paddsw mm0, qword ptr [eax]  ;  add source and destination
	paddsw mm1, qword ptr [eax+8];  add source and destination
	packuswb mm0, mm1            ;  pack mm0 and mm1 into mm0
	movq  qword ptr  [ebx], mm0  ;  copy output to destination
	add ebx, edi                 ;  add +stride to dest ptr

	movq mm2,  qword ptr [ebx]   ;  eight bytes of destination into mm4
	movq mm3,  mm2               ;  eight bytes of destination into mm3
	punpcklbw mm2, mm7           ;  unpack first 4 bytes from dest into mm4
	punpckhbw mm3, mm7           ;  unpack next 4 bytes from dest into mm5
	paddsw mm2, qword ptr [eax+16]  ;  add source and destination
	paddsw mm3, qword ptr [eax+24];  add source and destination
	packuswb mm2, mm3            ;  pack mm0 and mm1 into mm0
	movq  qword ptr  [ebx], mm2  ;  copy output to destination
	add ebx, edi                 ;  add +stride to dest ptr

	movq mm4,  qword ptr [ebx]   ;  eight bytes of destination into mm4
	movq mm5,  mm4               ;  eight bytes of destination into mm5
	punpcklbw mm4, mm7           ;  unpack first 4 bytes from dest into mm4
	punpckhbw mm5, mm7           ;  unpack next 4 bytes from dest into mm5
	paddsw mm4, qword ptr [eax+32]  ;  add source and destination
	paddsw mm5, qword ptr [eax+40];  add source and destination
	packuswb mm4, mm5            ;  pack mm0 and mm1 into mm0
	movq  qword ptr  [ebx], mm4  ;  copy output to destination
	add ebx, edi                 ;  add +stride to dest ptr

	movq mm0,  qword ptr [ebx]   ;  eight bytes of destination into mm4
	movq mm1,  qword ptr [ebx]   ;  eight bytes of destination into mm5
	punpcklbw mm0, mm7           ;  unpack first 4 bytes from dest into mm4
	punpckhbw mm1, mm7           ;  unpack next 4 bytes from dest into mm5
	paddsw mm0, qword ptr [eax+48]  ;  add source and destination
	paddsw mm1, qword ptr [eax+56];  add source and destination
	packuswb mm0, mm1            ;  pack mm0 and mm1 into mm0
	add eax, 64                  ;  add +64 to source ptr                
	movq  qword ptr  [ebx], mm0  ;  copy output to destination
	add ebx, edi                 ;  add +stride to dest ptr

	movq mm2,  qword ptr [ebx]   ;  eight bytes of destination into mm4
	movq mm3,  mm2               ;  eight bytes of destination into mm3
	punpcklbw mm2, mm7           ;  unpack first 4 bytes from dest into mm4
	punpckhbw mm3, mm7           ;  unpack next 4 bytes from dest into mm5
	paddsw mm2, qword ptr [eax]  ;  add source and destination
	paddsw mm3, qword ptr [eax+8];  add source and destination
	packuswb mm2, mm3            ;  pack mm0 and mm1 into mm0
	add eax, 16                  ;  add +16 to source ptr                
	movq  qword ptr  [ebx], mm2  ;  copy output to destination
	add ebx, edi                 ;  add +stride to dest ptr

	movq mm4,  qword ptr [ebx]   ;  eight bytes of destination into mm4
	movq mm5,  mm4               ;  eight bytes of destination into mm5
	punpcklbw mm4, mm7           ;  unpack first 4 bytes from dest into mm4
	punpckhbw mm5, mm7           ;  unpack next 4 bytes from dest into mm5
	paddsw mm4, qword ptr [eax]  ;  add source and destination
	paddsw mm5, qword ptr [eax+8];  add source and destination
	packuswb mm4, mm5            ;  pack mm0 and mm1 into mm0
	add eax, 16                  ;  add +16 to source ptr                
	movq  qword ptr  [ebx], mm4  ;  copy output to destination
	add ebx, edi                 ;  add +stride to dest ptr

	movq mm0,  qword ptr [ebx]   ;  eight bytes of destination into mm4
	movq mm1,  mm0               ;  eight bytes of destination into mm1
	punpcklbw mm0, mm7           ;  unpack first 4 bytes from dest into mm4
	punpckhbw mm1, mm7           ;  unpack next 4 bytes from dest into mm5
	paddsw mm0, qword ptr [eax]  ;  add source and destination
	paddsw mm1, qword ptr [eax+8];  add source and destination
	packuswb mm0, mm1            ;  pack mm0 and mm1 into mm0
	add eax, 16                  ;  add +16 to source ptr                
	movq  qword ptr  [ebx], mm0  ;  copy output to destination
	add ebx, edi                 ;  add +stride to dest ptr

	movq mm2,  qword ptr [ebx]   ;  eight bytes of destination into mm4
	movq mm3,  mm2               ;  eight bytes of destination into mm3
	punpcklbw mm2, mm7           ;  unpack first 4 bytes from dest into mm4
	punpckhbw mm3, mm7           ;  unpack next 4 bytes from dest into mm5
	paddsw mm2, qword ptr [eax]  ;  add source and destination
	paddsw mm3, qword ptr [eax+8];  add source and destination
	packuswb mm2, mm3            ;  pack mm0 and mm1 into mm0
	movq  qword ptr  [ebx], mm2  ;  copy output to destination

	pop edi
	pop ebx 
	pop eax

	emms


	}

	#ifdef _TEST_TRANSFER
	/* check destination against reference_dest[]... */
	for (y=0; y<8; y++) {
		for (x=0; x<8; x++) {
			if (reference_dest[8*y + x] != destU8[stride*y + x]) printf("transferIDCT_add() is broken\n");
		}
	}
	#endif
  
}

void transferIDCT_copy(int16_t *sourceS16, uint8_t *destU8, int stride) {
	#ifdef _TEST_TRANSFER
	int x, y, clipped;
	#endif
	
	_asm {

	; not sure about the state handling here - there must be a better way
	push eax
	push ebx
	push edi

	mov eax, sourceS16           ;  parameter 1, *sourceS16
	mov ebx, destU8              ;  parameter 2, *destU8
	mov edi, stride              ;  parameter 3, stride

; lines 0 to 7 schedueled into each other...
	movq mm0, qword ptr [eax]       ;  move first four words into mm0

	packuswb mm0, qword ptr [eax+8] ;  pack mm0 and the next four words into mm0

	movq mm1, qword ptr [eax+16]    ;  move first four words into mm1

	packuswb mm1, qword ptr [eax+24];  pack mm0 and the next four words into mm1

	movq mm2, qword ptr [eax+32]    ;  move first four words into mm2

	packuswb mm2, qword ptr [eax+40];  pack mm0 and the next four words into mm2

	movq mm3, qword ptr [eax+48]    ;  move first four words into mm3

	packuswb mm3, qword ptr [eax+56] ;  pack mm3 and the next four words into mm3

	movq qword ptr [ebx], mm0       ;  copy output to destination
	add ebx, edi                    ;  add +stride to dest ptr

	movq qword ptr [ebx], mm1       ;  copy output to destination
	add ebx, edi                    ;  add +stride to dest ptr

	movq qword ptr [ebx], mm2       ;  copy output to destination
	add ebx, edi                    ;  add +stride to dest ptr

	movq qword ptr [ebx], mm3       ;  copy output to destination
	add ebx, edi                    ;  add +stride to dest ptr
	
	movq mm0, qword ptr [eax+64]    ;  move first four words into mm0
	add eax, 64                     ;  add 64 to source ptr                

	packuswb mm0, qword ptr [eax+8] ;  pack mm0 and the next four words into mm0

	movq mm1, qword ptr [eax+16]    ;  move first four words into mm1

	packuswb mm1, qword ptr [eax+24];  pack mm0 and the next four words into mm1

	movq mm2, qword ptr [eax+32]    ;  move first four words into mm2

	packuswb mm2, qword ptr [eax+40];  pack mm0 and the next four words into mm2

	movq mm3, qword ptr [eax+48]    ;  move first four words into mm3

	packuswb mm3, qword ptr [eax+56];  pack mm3 and the next four words into mm3

	movq qword ptr [ebx], mm0       ;  copy output to destination
	add ebx, edi                    ;  add +stride to dest ptr

	movq qword ptr [ebx], mm1       ;  copy output to destination
	add ebx, edi                    ;  add +stride to dest ptr

	movq qword ptr [ebx], mm2       ;  copy output to destination
	add ebx, edi                    ;  add +stride to dest ptr

	movq qword ptr [ebx], mm3       ;  copy output to destination

	pop edi
	pop ebx 
	pop eax

	emms

	}

	#ifdef _TEST_TRANSFER
	for (y=0; y<8; y++) {
		for (x=0; x<8; x++) {
			clipped = sourceS16[8*y + x];
			if (clipped > 255) clipped = 255;
			if (clipped <   0) clipped =   0;
			if (clipped != destU8[stride*y+x]) printf("transferIDCT_copy() is broken\n");
		}
	}
	#endif

}
?? 文件大小 153 K
?? 上傳用戶 xhaibo
?? 所屬分類 DSP編程
??? 相關標簽

#MPEG4 #ARM #DSP #源代碼
?? 快捷鍵說明

復制代碼 Ctrl + C
搜索代碼 Ctrl + F
全屏模式 F11
切換主題 Ctrl + Shift + D
顯示快捷鍵 ?
增大字號 Ctrl + =
減小字號 Ctrl + -
亚洲欧美第一页_禁久久精品乱码_粉嫩av一区二区三区免费野_久草精品视频

?? transferidct_mmx.c

?? 快捷鍵說明