?? choose_table.nas
字號:
; new count bit routine; part of this code is origined from; new GOGO-no-coda (1999, 2000); Copyright (C) 1999 shigeo; modified by Keiichi SAKAI%include "nasm.h" globaldef choose_table_MMX globaldef MMX_masking externdef largetbl externdef t1l externdef table23 externdef table56 segment_data align 16D14_14_14_14 dd 0x000E000E, 0x000E000ED15_15_15_15 dd 0xfff0fff0, 0xfff0fff0mul_add dd 0x00010010, 0x00010010mul_add23 dd 0x00010003, 0x00010003mul_add56 dd 0x00010004, 0x00010004tableDEF dd 0x00010003,0x01,0x00050005,0x05,0x00070006,0x07,0x00090008,0x08,0x000a0008, 0x09 dd 0x000a0009,0x0a,0x000b000a,0x0a,0x000b000a,0x0b,0x000c000a,0x0a,0x000c000b, 0x0b dd 0x000c000b,0x0c,0x000d000c,0x0c,0x000d000c,0x0d,0x000d000c,0x0d,0x000e000d, 0x0e dd 0x000b000e,0x0e,0x00040005,0x04,0x00060005,0x06,0x00080007,0x08,0x00090008, 0x09 dd 0x000a0009,0x0a,0x000b0009,0x0a,0x000b000a,0x0b,0x000b000a,0x0b,0x000c000a, 0x0b dd 0x000c000b,0x0b,0x000c000b,0x0c,0x000d000c,0x0c,0x000e000c,0x0d,0x000d000c, 0x0e dd 0x000e000d,0x0e,0x000b000d,0x0e,0x00070006,0x07,0x00080007,0x08,0x00090007, 0x09 dd 0x000a0008,0x0a,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c dd 0x000d000a,0x0b,0x000c000b,0x0c,0x000d000b,0x0c,0x000d000c,0x0d,0x000d000c, 0x0d dd 0x000e000d,0x0e,0x000e000d,0x0f,0x000c000d,0x0f,0x00090007,0x08,0x00090008, 0x09 dd 0x000a0008,0x0a,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c dd 0x000c000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000c,0x0d,0x000e000c, 0x0d dd 0x000e000c,0x0d,0x000f000d,0x0e,0x000f000d,0x0f,0x000d000d,0x0f,0x000a0008, 0x09 dd 0x000a0008,0x09,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c dd 0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0c,0x000e000b,0x0d,0x000e000c, 0x0d dd 0x000e000c,0x0e,0x000f000c,0x0e,0x000f000d,0x0f,0x000f000d,0x0f,0x000c000d, 0x10 dd 0x000a0009,0x0a,0x000a0009,0x0a,0x000b0009,0x0b,0x000b000a,0x0c,0x000c000a, 0x0c dd 0x000d000a,0x0c,0x000d000b,0x0d,0x000e000b,0x0d,0x000d000b,0x0d,0x000e000b, 0x0d dd 0x000e000c,0x0e,0x000f000c,0x0d,0x000f000d,0x0f,0x000f000d,0x0f,0x0010000d, 0x10 dd 0x000d000e,0x10,0x000b000a,0x0a,0x000b0009,0x0b,0x000b000a,0x0c,0x000c000a, 0x0c dd 0x000d000a,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000e000b, 0x0d dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000e000c,0x0e,0x000f000d,0x0f,0x000f000d, 0x0f dd 0x0010000e,0x10,0x000d000e,0x10,0x000b000a,0x0b,0x000b000a,0x0b,0x000c000a, 0x0c dd 0x000c000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0e,0x000e000c, 0x0e dd 0x000e000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0f,0x000f000c,0x0f,0x000f000d, 0x0f dd 0x0011000d,0x10,0x0011000d,0x12,0x000d000e,0x12,0x000b000a,0x0a,0x000c000a, 0x0a dd 0x000c000a,0x0b,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000b, 0x0d dd 0x000e000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0e,0x000f000d, 0x0f dd 0x0010000d,0x0f,0x0010000e,0x10,0x0010000e,0x11,0x000d000e,0x11,0x000c000a, 0x0b dd 0x000c000a,0x0b,0x000c000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000b, 0x0d dd 0x000e000c,0x0d,0x000f000c,0x0f,0x000f000c,0x0e,0x000f000d,0x0f,0x000f000d, 0x0f dd 0x0010000d,0x10,0x000f000d,0x10,0x0010000e,0x10,0x000f000e,0x12,0x000e000e, 0x11 dd 0x000c000b,0x0b,0x000d000b,0x0c,0x000c000b,0x0c,0x000d000b,0x0d,0x000e000c, 0x0d dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000e000c,0x0f,0x000f000c,0x0e,0x0010000d, 0x0f dd 0x0010000d,0x10,0x0010000d,0x0f,0x0011000d,0x10,0x0011000e,0x11,0x0010000f, 0x12 dd 0x000d000e,0x13,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b, 0x0d dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000f000c,0x0e,0x0010000c,0x0e,0x0010000d, 0x0f dd 0x0010000d,0x0f,0x0010000d,0x0f,0x0010000d,0x10,0x0010000e,0x11,0x000f000e, 0x11 dd 0x0010000e,0x11,0x000e000f,0x12,0x000d000c,0x0c,0x000e000c,0x0d,0x000e000b, 0x0d dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000f000c,0x0f,0x000f000d,0x0e,0x000f000d, 0x0f dd 0x000f000d,0x10,0x0011000d,0x10,0x0010000d,0x11,0x0010000d,0x11,0x0010000e, 0x11 dd 0x0010000e,0x12,0x0012000f,0x12,0x000e000f,0x12,0x000f000c,0x0d,0x000e000c, 0x0d dd 0x000e000c,0x0e,0x000e000c,0x0f,0x000f000c,0x0f,0x000f000d,0x0f,0x0010000d, 0x10 dd 0x0010000d,0x10,0x0010000d,0x10,0x0012000e,0x10,0x0011000e,0x10,0x0011000e, 0x11 dd 0x0011000e,0x12,0x0013000e,0x11,0x0011000f,0x12,0x000e000f,0x12,0x000e000d, 0x0e dd 0x000f000d,0x0e,0x000d000d,0x0e,0x000e000d,0x0f,0x0010000d,0x0f,0x0010000d, 0x0f dd 0x000f000d,0x11,0x0010000d,0x10,0x0010000e,0x10,0x0011000e,0x13,0x0012000e, 0x11 dd 0x0011000e,0x11,0x0013000f,0x11,0x0011000f,0x13,0x0010000e,0x12,0x000e000f, 0x12 dd 0x000b000d,0x0d,0x000b000d,0x0e,0x000b000d,0x0f,0x000c000d,0x10,0x000c000d, 0x10 dd 0x000d000d,0x10,0x000d000d,0x11,0x000d000e,0x10,0x000e000e,0x11,0x000e000e, 0x11 dd 0x000e000e,0x12,0x000e000e,0x12,0x000e000f,0x15,0x000e000f,0x14,0x000e000f, 0x15 dd 0x000c000f,0x12tableABC dd 0x00020004,0x1,0x00040004,0x4,0x00060006,0x7,0x00080008,0x9,0x00090009,0xa,0x000a000a,0xa dd 0x0009000a,0xa,0x000a000a,0xb,0x00000000,0x0,0x00020003,0x1,0x00040004,0x4,0x00070006,0x7 dd 0x00090007,0x9,0x00090009,0x9,0x000a000a,0xa,0x00000000,0x0,0x00040004,0x4,0x00050005,0x6 dd 0x00060006,0x8,0x00080007,0x9,0x000a0009,0xa,0x000a0009,0xb,0x0009000a,0xa,0x000a000a,0xa dd 0x00000000,0x0,0x00040004,0x4,0x00040005,0x6,0x00060006,0x8,0x000a0007,0x9,0x000a0008,0x9 dd 0x000a000a,0xa,0x00000000,0x0,0x00060006,0x7,0x00070006,0x8,0x00080007,0x9,0x00090008,0xa dd 0x000a0009,0xb,0x000b000a,0xc,0x000a0009,0xb,0x000a000a,0xb,0x00000000,0x0,0x00070005,0x7 dd 0x00060006,0x7,0x00080007,0x9,0x000a0008,0xa,0x000a0009,0xa,0x000b000a,0xb,0x00000000,0x0 dd 0x00080007,0x8,0x00080007,0x9,0x00090008,0xa,0x000b0008,0xb,0x000a0009,0xc,0x000c000a,0xc dd 0x000a000a,0xb,0x000b000a,0xc,0x00000000,0x0,0x00090007,0x8,0x000a0007,0x9,0x000a0008,0xa dd 0x000b0009,0xb,0x000b0009,0xb,0x000c000a,0xb,0x00000000,0x0,0x00090008,0x9,0x000a0008,0xa dd 0x000a0009,0xb,0x000b0009,0xc,0x000b000a,0xc,0x000c000a,0xc,0x000b000a,0xc,0x000c000b,0xc dd 0x00000000,0x0,0x00090008,0x8,0x00090008,0x9,0x000a0009,0xa,0x000b0009,0xb,0x000c000a,0xb dd 0x000c000b,0xc,0x00000000,0x0,0x00090009,0xa,0x000a0009,0xb,0x000b000a,0xc,0x000c000a,0xc dd 0x000c000a,0xd,0x000d000b,0xd,0x000c000a,0xc,0x000d000b,0xd,0x00000000,0x0,0x000a0009,0x9 dd 0x000a0009,0xa,0x000b000a,0xb,0x000b000a,0xc,0x000d000b,0xc,0x000d000b,0xc,0x00000000,0x0 dd 0x00090009,0x9,0x00090009,0xa,0x00090009,0xb,0x000a000a,0xc,0x000b000a,0xc,0x000c000b,0xc dd 0x000c000b,0xd,0x000c000c,0xd,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0 dd 0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x0009000a,0xa,0x0009000a,0xa dd 0x000a000a,0xb,0x000b000b,0xc,0x000c000b,0xc,0x000c000b,0xd,0x000c000b,0xd,0x000c000c,0xd dd 0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0 dd 0x0,0x00000000, 0x0,0x00000000linbits32 dd 0x00040004,0x10001,0x00040004,0x20002,0x00040004,0x30003,0x00040004,0x40004 dd 0x00050005,0x60006,0x00060006,0x60006,0x00070007,0x80008,0x00080008,0x80008 dd 0x00090009,0xa000a,0x000b000b,0xa000a,0x000b000b,0xd000d,0x000d000d,0xd000d dd 0x000d000d,0xd000dchoose_table_H dw 0x1810, 0x1811, 0x1812, 0x1813, 0x1914, 0x1a14, 0x1b15, 0x1c15 dw 0x1d16, 0x1e16, 0x1e17, 0x1f17, 0x1f17choose_jump_table_L: dd table_MMX.L_case_0 - choose_table_MMX dd table_MMX.L_case_1 - choose_table_MMX dd table_MMX.L_case_2 - choose_table_MMX dd table_MMX.L_case_3 - choose_table_MMX dd table_MMX.L_case_45 - choose_table_MMX dd table_MMX.L_case_45 - choose_table_MMX dd table_MMX.L_case_67 - choose_table_MMX dd table_MMX.L_case_67 - choose_table_MMX dd table_MMX.L_case_8_15 - choose_table_MMX dd table_MMX.L_case_8_15 - choose_table_MMX dd table_MMX.L_case_8_15 - choose_table_MMX dd table_MMX.L_case_8_15 - choose_table_MMX dd table_MMX.L_case_8_15 - choose_table_MMX dd table_MMX.L_case_8_15 - choose_table_MMX dd table_MMX.L_case_8_15 - choose_table_MMX dd table_MMX.L_case_8_15 - choose_table_MMX segment_code;; use MMX;PIC_OFFSETTABLE align 16; int choose_table(int *ix, int *end, int *s)choose_table_MMX: push ebp call get_pc.bp add ebp, PIC_BASE() mov ecx,[esp+8] ;ecx = begin mov edx,[esp+12] ;edx = end sub ecx,edx ;ecx = begin-end(should be minus) test ecx,8 pxor mm0,mm0 ;mm0=[0:0] movq mm1,[edx+ecx] jz .lp add ecx,8 jz .exit align 4.lp: movq mm4,[edx+ecx] movq mm5,[edx+ecx+8] add ecx,16 psubusw mm4,mm0 ; $BK\Ev$O(B dword $B$G$J$$$H$$$1$J$$$N$@$,(B psubusw mm5,mm1 ; $B$=$s$J%3%^%s%I$O$J$$(B :-p paddw mm0,mm4 ; $B$,(B, $B$3$3$G07$&CM$NHO0O$O(B 8191+15 $B0J2<$J$N$GLdBj$J$$(B paddw mm1,mm5 jnz .lp.exit: psubusw mm1,mm0 ; $B$3$l$bK\Ev$O(B dword $B$G$J$$$H$$$1$J$$(B paddw mm0,mm1 movq mm4,mm0 punpckhdq mm4,mm4 psubusw mm4,mm0 ; $B$3$l$bK\Ev$O(B dword $B$G$J$$$H$$$1$J$$(B paddw mm0,mm4 movd eax,mm0 cmp eax,15 ja .with_ESC lea ecx,[PIC_EBP_REL(choose_table_MMX)] add ecx,[PIC_EBP_REL(choose_jump_table_L+eax*4)] jmp ecx.with_ESC1: emms mov ecx, [esp+16] ; *s mov [ecx], eax or eax,-1 pop ebp ret.with_ESC: cmp eax, 8191+15 ja .with_ESC1 sub eax,15 push ebx push esi bsr eax, eax%assign _P 4*2 movq mm5, [PIC_EBP_REL(D15_15_15_15)] movq mm6, [PIC_EBP_REL(D14_14_14_14)] movq mm3, [PIC_EBP_REL(mul_add)] mov ecx, [esp+_P+8] ; = ix; mov edx, [esp+_P+12] ; = end sub ecx, edx xor esi, esi ; sum = 0 test ecx, 8 pxor mm7, mm7 ; linbits_sum, 14$B$r1[$($?$b$N$N?t(B jz .H_dual_lp1 movq mm0, [edx+ecx] add ecx,8 packssdw mm0,mm7 movq mm2, mm0 paddusw mm0, mm5 ; mm0 = min(ix, 15)+0xfff0 pcmpgtw mm2, mm6 ; 14$B$h$jBg$-$$$+!)(B psubw mm7, mm2 ; 14$B$h$jBg$-$$$H$-(B linbits_sum++; pmaddwd mm0, mm3 ; {0, 0, y, x}*{1, 16, 1, 16} movd ebx, mm0 mov esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)] jz .H_dual_exit align 4.H_dual_lp1: movq mm0, [edx+ecx] movq mm1, [edx+ecx+8] packssdw mm0,mm1 movq mm2, mm0 paddusw mm0, mm5 ; mm0 = min(ix, 15)+0xfff0 pcmpgtw mm2, mm6 ; 14$B$h$jBg$-$$$+!)(B pmaddwd mm0, mm3 ; {y, x, y, x}*{1, 16, 1, 16} movd ebx, mm0 punpckhdq mm0,mm0 add esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)] movd ebx, mm0 add esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)] add ecx, 16 psubw mm7, mm2 ; 14$B$h$jBg$-$$$H$-(B linbits_sum++; jnz .H_dual_lp1.H_dual_exit: pmov mm1,mm7 punpckhdq mm7,mm7 paddd mm7,mm1 punpckldq mm7,mm7 pmaddwd mm7, [PIC_EBP_REL(linbits32+eax*8)] ; linbits mov ax, [PIC_EBP_REL(choose_table_H+eax*2)] movd ecx, mm7 punpckhdq mm7,mm7 movd edx,mm7 emms shl edx, 16 add ecx, edx add ecx, esi pop esi pop ebx mov edx, ecx and ecx, 0xffff ; ecx = sum2 shr edx, 16 ; edx = sum cmp edx, ecx jle .chooseE_s1 mov edx, ecx shr eax, 8.chooseE_s1: mov ecx, [esp+16] ; *s and eax, 0xff add [ecx], edx pop ebp rettable_MMX.L_case_0: emms pop ebp rettable_MMX.L_case_1: emms mov eax, [esp+16] ; *s mov ecx, [esp+8] ; *ix sub ecx, edx push ebx.lp: mov ebx, [edx+ecx] add ebx, ebx add ebx, [edx+ecx+4] movzx ebx, byte [PIC_EBP_REL(ebx+t1l)] add [eax], ebx add ecx, 8 jnz .lp pop ebx mov eax, 1 pop ebp rettable_MMX.L_case_45: push dword 7 lea ecx, [PIC_EBP_REL(tableABC+9*8)] jmp from3table_MMX.L_case_67: push dword 10 lea ecx, [PIC_EBP_REL(tableABC)] jmp from3table_MMX.L_case_8_15: push dword 13 lea ecx, [PIC_EBP_REL(tableDEF)]from3: mov eax,[esp+12] ;eax = *begin; mov edx,[esp+16] ;edx = *end push ebx sub eax, edx movq mm5,[PIC_EBP_REL(mul_add)] pxor mm2,mm2 ;mm2 = sum test eax, 8 jz .choose3_lp1; odd length movq mm0,[edx+eax] ;mm0 = ix[0] | ix[1] add eax,8 packssdw mm0,mm2 pmaddwd mm0,mm5 movd ebx,mm0 movq mm2, [ecx+ebx*8] jz .choose3_exit align 4.choose3_lp1 movq mm0,[edx+eax] movq mm1,[edx+eax+8] add eax,16 packssdw mm0,mm1 ;mm0 = ix[0]|ix[1]|ix[2]|ix[3] pmaddwd mm0,mm5 movd ebx,mm0 punpckhdq mm0,mm0 paddd mm2, [ecx+ebx*8] movd ebx,mm0 paddd mm2, [ecx+ebx*8] jnz .choose3_lp1.choose3_exit; xor eax,eax movd ebx, mm2 punpckhdq mm2,mm2 mov ecx, ebx and ecx, 0xffff ; ecx = sum2 shr ebx, 16 ; ebx = sum1 movd edx, mm2 ; edx = sum cmp edx, ebx jle .choose3_s1 mov edx, ebx inc eax.choose3_s1: emms pop ebx cmp edx, ecx jle .choose3_s2 mov edx, ecx mov eax, 2.choose3_s2: pop ecx add eax, ecx mov ecx, [esp+16] ; *s add [ecx], edx pop ebp rettable_MMX.L_case_2: push dword 2 lea ecx,[PIC_EBP_REL(table23)] pmov mm5,[PIC_EBP_REL(mul_add23)] jmp from2table_MMX.L_case_3: push dword 5 lea ecx,[PIC_EBP_REL(table56)] pmov mm5,[PIC_EBP_REL(mul_add56)]from2: mov eax,[esp+12] ;eax = *begin; mov edx,[esp+16] ;edx = *end push ebx push edi sub eax, edx xor edi, edi test eax, 8 jz .choose2_lp1; odd length movq mm0,[edx+eax] ;mm0 = ix[0] | ix[1] pxor mm2,mm2 ;mm2 = sum packssdw mm0,mm2 pmaddwd mm0,mm5 movd ebx,mm0 mov edi, [ecx+ebx*4] add eax,8 jz .choose2_exit align 4.choose2_lp1 movq mm0,[edx+eax] movq mm1,[edx+eax+8] packssdw mm0,mm1 ;mm0 = ix[0]|ix[1]|ix[2]|ix[3] pmaddwd mm0,mm5 movd ebx,mm0 punpckhdq mm0,mm0 add edi, [ecx+ebx*4] movd ebx, mm0 add edi, [ecx+ebx*4] add eax,16 jnc .choose2_lp1.choose2_exit mov ecx, edi pop edi pop ebx pop eax ; table num. emms mov edx, ecx and ecx, 0xffff ; ecx = sum2 shr edx, 16 ; edx = sum1 cmp edx, ecx jle .choose2_s1 mov edx, ecx inc eax.choose2_s1: mov ecx, [esp+16] ; *s add [ecx], edx pop ebp ret end
?? 快捷鍵說明
復制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號
Ctrl + =
減小字號
Ctrl + -