2
我想在DWORD數組中找到不等於0的最重要的DWORD。該算法應該針對高達128字節的數據進行優化。在DWORD數組中找到最重要的DWORD
我做了三個不同的函數,它們都返回特定DWORD的索引。
unsigned long msb_msvc(long* dw, std::intptr_t n)
{
while(--n)
{
if(dw[n])
break;
}
return n;
}
static inline unsigned long msb_386(long* dw, std::intptr_t n)
{
__asm
{
mov ecx, [dw]
mov eax, [n]
__loop: sub eax, 1
jz SHORT __exit
cmp DWORD PTR [ecx + eax * 4], 0
jz SHORT __loop
__exit:
}
}
static inline unsigned long msb_sse2(long* dw, std::intptr_t n)
{
__asm
{
mov ecx, [dw]
mov eax, [n]
test ecx, 0x0f
jnz SHORT __128_unaligned
__128_aligned:
cmp eax, 4
jb SHORT __64
sub eax, 4
movdqa xmm0, XMMWORD PTR [ecx + eax * 4]
pxor xmm1, xmm1
pcmpeqd xmm0, xmm1
pmovmskb edx, xmm0
not edx
and edx, 0xffff
jz SHORT __128_aligned
jmp SHORT __exit
__128_unaligned:
cmp eax, 4
jb SHORT __64
sub eax, 4
movdqu xmm0, XMMWORD PTR [ecx + eax * 4]
pxor xmm1, xmm1
pcmpeqd xmm0, xmm1
pmovmskb edx, xmm0
not edx
and edx, 0xffff
jz SHORT __128_unaligned
jmp SHORT __exit
__64:
cmp eax, 2
jb __32
sub eax, 2
movq mm0, MMWORD PTR [ecx + eax * 4]
pxor mm1, mm1
pcmpeqd mm0, mm1
pmovmskb edx, mm0
not edx
and edx, 0xff
emms
jz SHORT __64
jmp SHORT __exit
__32:
test eax, eax
jz SHORT __exit
xor eax, eax
jmp __leave ; retn
__exit:
bsr edx, edx
shr edx, 2
add eax, edx
__leave:
}
}
應該使用這些函數來預先選擇將要相互比較的數據。所以,它需要高性能。
有人知道更好的算法嗎?
看看BitScanReverse http://msdn.microsoft.com/en-US/library/fbxyd7zd(v=VS.80).aspx或__builtin_clz對於非MS? – FigmentEngine 2011-04-12 11:51:57
謝謝你的回答。首先,_BitScanRever和_BitScanForward返回最重要和最不重要的BIT(我想得到一個字節或雙字)。其次,這些函數使用bsf和bsr指令..沒有別的。 – 0xbadf00d 2011-04-12 12:49:54