?? pngvcrd.c
字號:
/* pngvcrd.c - mixed C/assembler version of utilities to read a PNG file
*
* For Intel x86 CPU and Microsoft Visual C++ compiler
*
* libpng version 1.2.7 - September 12, 2004
* For conditions of distribution and use, see copyright notice in png.h
* Copyright (c) 1998-2004 Glenn Randers-Pehrson
* Copyright (c) 1998, Intel Corporation
*
* Contributed by Nirav Chhatrapati, Intel Corporation, 1998
* Interface to libpng contributed by Gilles Vollant, 1999
*
*
* In png_do_read_interlace() in libpng versions 1.0.3a through 1.0.4d,
* a sign error in the post-MMX cleanup code for each pixel_depth resulted
* in bad pixels at the beginning of some rows of some images, and also
* (due to out-of-range memory reads and writes) caused heap corruption
* when compiled with MSVC 6.0. The error was fixed in version 1.0.4e.
*
* [png_read_filter_row_mmx_avg() bpp == 2 bugfix, GRR 20000916]
*
* [runtime MMX configuration, GRR 20010102]
*
*/
#define PNG_INTERNAL
#include "png.h"
#if defined(PNG_ASSEMBLER_CODE_SUPPORTED) && defined(PNG_USE_PNGVCRD)
static int mmx_supported=2;
int PNGAPI
png_mmx_support(void)
{
int mmx_supported_local = 0;
_asm {
push ebx /*CPUID will trash these */
push ecx
push edx
pushfd /*Save Eflag to stack */
pop eax /*Get Eflag from stack into eax */
mov ecx, eax /*Make another copy of Eflag in ecx */
xor eax, 0x200000 /*Toggle ID bit in Eflag [i.e. bit(21)] */
push eax /*Save modified Eflag back to stack */
popfd /*Restored modified value back to Eflag reg */
pushfd /*Save Eflag to stack */
pop eax /*Get Eflag from stack */
push ecx /* save original Eflag to stack */
popfd /* restore original Eflag */
xor eax, ecx /*Compare the new Eflag with the original Eflag */
jz NOT_SUPPORTED /*If the same, CPUID instruction is not supported, */
/*skip following instructions and jump to */
/*NOT_SUPPORTED label */
xor eax, eax /*Set eax to zero */
_asm _emit 0x0f /*CPUID instruction (two bytes opcode) */
_asm _emit 0xa2
cmp eax, 1 /*make sure eax return non-zero value */
jl NOT_SUPPORTED /*If eax is zero, mmx not supported */
xor eax, eax /*set eax to zero */
inc eax /*Now increment eax to 1. This instruction is */
/*faster than the instruction "mov eax, 1" */
_asm _emit 0x0f /*CPUID instruction */
_asm _emit 0xa2
and edx, 0x00800000 /*mask out all bits but mmx bit(24) */
cmp edx, 0 /* 0 = mmx not supported */
jz NOT_SUPPORTED /* non-zero = Yes, mmx IS supported */
mov mmx_supported_local, 1 /*set return value to 1 */
NOT_SUPPORTED:
mov eax, mmx_supported_local /*move return value to eax */
pop edx /*CPUID trashed these */
pop ecx
pop ebx
}
/*mmx_supported_local=0; // test code for force don't support MMX */
/*printf("MMX : %u (1=MMX supported)\n",mmx_supported_local); */
mmx_supported = mmx_supported_local;
return mmx_supported_local;
}
/* Combines the row recently read in with the previous row.
This routine takes care of alpha and transparency if requested.
This routine also handles the two methods of progressive display
of interlaced images, depending on the mask value.
The mask value describes which pixels are to be combined with
the row. The pattern always repeats every 8 pixels, so just 8
bits are needed. A one indicates the pixel is to be combined; a
zero indicates the pixel is to be skipped. This is in addition
to any alpha or transparency value associated with the pixel. If
you want all pixels to be combined, pass 0xff (255) in mask. */
/* Use this routine for x86 platform - uses faster MMX routine if machine
supports MMX */
void /* PRIVATE */
png_combine_row(png_structp png_ptr, png_bytep row, int mask)
{
#ifdef PNG_USE_LOCAL_ARRAYS
const int png_pass_inc[7] = {8, 8, 4, 4, 2, 2, 1};
#endif
png_debug(1,"in png_combine_row_asm\n");
if (mmx_supported == 2) {
#if !defined(PNG_1_0_X)
/* this should have happened in png_init_mmx_flags() already */
png_warning(png_ptr, "asm_flags may not have been initialized");
#endif
png_mmx_support();
}
if (mask == 0xff)
{
png_memcpy(row, png_ptr->row_buf + 1,
(png_size_t)PNG_ROWBYTES(png_ptr->row_info.pixel_depth,
png_ptr->width));
}
/* GRR: add "else if (mask == 0)" case?
* or does png_combine_row() not even get called in that case? */
else
{
switch (png_ptr->row_info.pixel_depth)
{
case 1:
{
png_bytep sp;
png_bytep dp;
int s_inc, s_start, s_end;
int m;
int shift;
png_uint_32 i;
sp = png_ptr->row_buf + 1;
dp = row;
m = 0x80;
#if defined(PNG_READ_PACKSWAP_SUPPORTED)
if (png_ptr->transformations & PNG_PACKSWAP)
{
s_start = 0;
s_end = 7;
s_inc = 1;
}
else
#endif
{
s_start = 7;
s_end = 0;
s_inc = -1;
}
shift = s_start;
for (i = 0; i < png_ptr->width; i++)
{
if (m & mask)
{
int value;
value = (*sp >> shift) & 0x1;
*dp &= (png_byte)((0x7f7f >> (7 - shift)) & 0xff);
*dp |= (png_byte)(value << shift);
}
if (shift == s_end)
{
shift = s_start;
sp++;
dp++;
}
else
shift += s_inc;
if (m == 1)
m = 0x80;
else
m >>= 1;
}
break;
}
case 2:
{
png_bytep sp;
png_bytep dp;
int s_start, s_end, s_inc;
int m;
int shift;
png_uint_32 i;
int value;
sp = png_ptr->row_buf + 1;
dp = row;
m = 0x80;
#if defined(PNG_READ_PACKSWAP_SUPPORTED)
if (png_ptr->transformations & PNG_PACKSWAP)
{
s_start = 0;
s_end = 6;
s_inc = 2;
}
else
#endif
{
s_start = 6;
s_end = 0;
s_inc = -2;
}
shift = s_start;
for (i = 0; i < png_ptr->width; i++)
{
if (m & mask)
{
value = (*sp >> shift) & 0x3;
*dp &= (png_byte)((0x3f3f >> (6 - shift)) & 0xff);
*dp |= (png_byte)(value << shift);
}
if (shift == s_end)
{
shift = s_start;
sp++;
dp++;
}
else
shift += s_inc;
if (m == 1)
m = 0x80;
else
m >>= 1;
}
break;
}
case 4:
{
png_bytep sp;
png_bytep dp;
int s_start, s_end, s_inc;
int m;
int shift;
png_uint_32 i;
int value;
sp = png_ptr->row_buf + 1;
dp = row;
m = 0x80;
#if defined(PNG_READ_PACKSWAP_SUPPORTED)
if (png_ptr->transformations & PNG_PACKSWAP)
{
s_start = 0;
s_end = 4;
s_inc = 4;
}
else
#endif
{
s_start = 4;
s_end = 0;
s_inc = -4;
}
shift = s_start;
for (i = 0; i < png_ptr->width; i++)
{
if (m & mask)
{
value = (*sp >> shift) & 0xf;
*dp &= (png_byte)((0xf0f >> (4 - shift)) & 0xff);
*dp |= (png_byte)(value << shift);
}
if (shift == s_end)
{
shift = s_start;
sp++;
dp++;
}
else
shift += s_inc;
if (m == 1)
m = 0x80;
else
m >>= 1;
}
break;
}
case 8:
{
png_bytep srcptr;
png_bytep dstptr;
png_uint_32 len;
int m;
int diff, unmask;
__int64 mask0=0x0102040810204080;
#if !defined(PNG_1_0_X)
if ((png_ptr->asm_flags & PNG_ASM_FLAG_MMX_READ_COMBINE_ROW)
/* && mmx_supported */ )
#else
if (mmx_supported)
#endif
{
srcptr = png_ptr->row_buf + 1;
dstptr = row;
m = 0x80;
unmask = ~mask;
len = png_ptr->width &~7; /*reduce to multiple of 8 */
diff = png_ptr->width & 7; /*amount lost */
_asm
{
movd mm7, unmask /*load bit pattern */
psubb mm6,mm6 /*zero mm6 */
punpcklbw mm7,mm7
punpcklwd mm7,mm7
punpckldq mm7,mm7 /*fill register with 8 masks */
movq mm0,mask0
pand mm0,mm7 /*nonzero if keep byte */
pcmpeqb mm0,mm6 /*zeros->1s, v versa */
mov ecx,len /*load length of line (pixels) */
mov esi,srcptr /*load source */
mov ebx,dstptr /*load dest */
cmp ecx,0 /*lcr */
je mainloop8end
mainloop8:
movq mm4,[esi]
pand mm4,mm0
movq mm6,mm0
pandn mm6,[ebx]
por mm4,mm6
movq [ebx],mm4
add esi,8 /*inc by 8 bytes processed */
add ebx,8
sub ecx,8 /*dec by 8 pixels processed */
ja mainloop8
mainloop8end:
mov ecx,diff
cmp ecx,0
jz end8
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -