這是方式,我從您的文章的理解。 我做了一個python程序,用循環鍵生成加密xor流,並嘗試應用hamming字符串距離歸一化方法來查找最佳潛在循環密鑰大小。 我不把東西轉換成base64,我直接應用字符串距離而不是二進制距離。
#!/usr/bin/python
import sys
from itertools import cycle
def xor_file_with_cycling_strkey(filelocation,outfile,key):
print filelocation
f=open(filelocation,'r')
f2=open(outfile,'w')
lines=[]
text=f.read()
if text != '':
for c,k in zip(text,cycle(key)):
r=chr(ord(c)^ord(k))
f2.write(r)
f2.close()
f.close()
# not used here, see compute_distance_char based on same idea.
def compute_distance(str1,str2,keysize):
count=0
print '%s %s' % (str1,str2)
str1=str1.replace("\n", "")
str2=str2.replace("\n", "")
keysize=str(keysize*8)
sbin1=format(int(str1,16),'0'+keysize+'b')
sbin2=format(int(str2,16),'0'+keysize+'b')
return hamming_distance_str(sbin1,sbin2)
#do preferer hamming_distance_bin which quicker.
def compute_distance_char(str1,str2,keysize):
count=0
str1=str1.replace("\n", "")
str2=str2.replace("\n", "")
keysize=str(keysize*8)
sbin1=''
sbin2=''
for c in str1:
sbin1=sbin1 + format(ord(c),'0'+keysize+'b')
for c in str2:
sbin2=sbin2 + format(ord(c),'0'+keysize+'b')
return hamming_distance_str(sbin1,sbin2)
def hamming_distance_str(str1,str2):
count=0
for c1,c2 in zip(str1, str2):
if c1!=c2:
count+=1
return count
def hamming_distance_bin(str1,str2):
count=0
for c1,c2 in zip(str1, str2):
if c1!=c2:
# quick hamming distance, counting number of differing bits.
s=ord(c1)^ord(c2)
# count number of bits sets using Wegner algorithm
while s !=0:
s&=(s-1);
count+=1
return count
def keysize_dist(filelocation):
potential_keysize=0
min_dist=40.0
f=open(filelocation,'r')
lines=[]
for line in f.readlines():
line=line.strip('\n')
lines.append(line)
lines=''.join(lines).strip('\n')
normalized=[]
for keysize in range(2,40):
# should first create base16 entries for that one , then don't use it : count_bin1=compute_distance(lines[0:keysize*2],lines[keysize*2:keysize*4],keysize)
# proof that both functions compute same value :
count_bin1=compute_distance_char(lines[0:keysize*2],lines[keysize*2:keysize*4],keysize)
count_bin2=hamming_distance_bin(lines[0:keysize*2],lines[keysize*2:keysize*4])
if (count_bin1 != count_bin2):
print 'Discrepency between compute_distance_char->%i and hamming_distance_bin->%i' % (count_bin1,count_bin2)
count=hamming_distance_str(lines[0:keysize*2],lines[keysize*2:keysize*4])
normalized_distance=float(count)/keysize
print '%s %f' % (keysize,normalized_distance)
if (normalized_distance < min_dist):
potential_keysize=keysize
min_dist=normalized_distance
# we are more interested in keysize corresponding to minimal distance, tha n to minimal distance itself.
return potential_keysize,min_dist
def main(args=sys.argv):
if (len(args) < 2):
print 'Please enter cleartext origin file to be ciphered then checked an optionaly a key string (max length 40)'
return 1
if (len(args) > 2):
key=args[2]
else:
# on purpose default to key with a KEYSIZE char length 5.
key='12345'
xor_file_with_cycling_strkey(args[1],args[1]+'.ciphered',key)
xor_file_with_cycling_strkey(args[1]+'.ciphered',args[1] + '.cleartext',key)
# raw non base64 encoded.
print keysize_dist(args[1] + '.ciphered')
if __name__ == "__main__":
main()
通過該代碼,您可以獲得完全解決問題所需的所有輸入。
./hamming_detect_xor_cycle.py明文123456789ABCDE ... (14,1.7857142857142858)
它不正確地檢測到所有的大小,但我認爲這是一個統計效果,取決於明文本身可以循環屬性。正如你的主題所說:使用更多的塊可以提供更好的結果。
如果工作正常,您爲什麼認爲自己沒有做到正確? – 2014-11-02 08:41:09
由於最小標準化距離是爲大小5,我沒有成功解碼wih keysize 5 .. – 2014-11-02 08:42:44
因此,那麼它不工作... – 2014-11-02 08:43:29