2013-10-01 45 views
0

排序比方說,我有以下的有序列表:合併有間隙

a = ['8EF5CD1B', 'B1392DB3', '59770CD6', 'BD23F32A', '4EFA3222'] 
b = ['8EF5CD1B', '96276D30', 'B1392DB3', '59770CD6', '4EFA3222'] 
c = ['96276D30', 'B1392DB3', 'BD23F32A', '59770CD6'] 

我希望他們通過合併從低優先級列表填補空白排序。

>>> from itertools import permutations 
>>> LISTS = (a, b, c) 
>>> for (first, second) in permutations(LISTS, 2): 
...  print((LISTS.index(first), LISTS.index(second)), magic(first, second)) 
... 
(0, 1) ['8EF5CD1B', '96276D30', 'B1392DB3', '59770CD6', 'BD23F32A', '4EFA3222'] 
(0, 2) ['8EF5CD1B', '96276D30', 'B1392DB3', '59770CD6', 'BD23F32A', '4EFA3222'] 
(1, 0) ['8EF5CD1B', '96276D30', 'B1392DB3', '59770CD6', 'BD23F32A', '4EFA3222'] 
(1, 2) ['8EF5CD1B', '96276D30', 'B1392DB3', 'BD23F32A', '59770CD6', '4EFA3222'] 
(2, 0) ['96276D30', '8EF5CD1B', 'B1392DB3', 'BD23F32A', '59770CD6', '4EFA3222'] 
(2, 1) ['8EF5CD1B', '96276D30', 'B1392DB3', 'BD23F32A', '59770CD6', '4EFA3222'] 
>>>magic(*LISTS) 
['8EF5CD1B', '96276D30', 'B1392DB3', '59770CD6', 'BD23F32A', '4EFA3222'] 

正如你可以看到(0,1)96276D30去,因爲那裏是由b名單有填補了一個空白的第二位。如果有訂單,優先順序是第一個參數。魔術功能應該與兩個以上的參數一起工作,就像上面的例子一樣。我編寫了一個可行的代碼,但對於這樣一個看起來很簡單的任務來說,它是醜陋的(可能太慢)。

MAX_ITERATIONS = 1000 
class UnjoinableListsError(Exception): pass 

def magic(*lists, iterations=MAX_ITERATIONS): 
    """ 
    Returns a joint sorted list of presorted lists (or tuples). 

    First it checks for common items, then it defines a gap list to put 
    non-commons in. Finally it mixes them all. If items of more presorted 
    list (or tuple) competes for a gap place, they will sorted in order 
    of their parents were in arguments. 
    """ 
    def sort_two(first, second): 
     commons = [item for item in first if item in second] 
     gap_list = [[] for i in range(len(commons)+1)] 
     for l in (first, second): 
      gap_item = [] 
      sliced = [] 
      for common_item in commons: 
       common_i = l.index(common_item) 
       sliced.append((list(l[:common_i]), list(l[common_i+1:]))) 
      gap_item.append(sliced[0][0]) 
      for j in range(len(sliced) - 1): 
       gap_item.append([item for item in sliced[j][1] 
            if item in sliced[j+1][0]]) 
      gap_item.append(sliced[-1][1]) 
      for j, item in enumerate(gap_item): 
       gap_list[j].extend([i for i in item if i not in commons]) 
     result = [] 
     result.extend(gap_list[0]) 
     for i in range(len(commons)): 
      result.append(commons[i]) 
      result.extend(gap_list[i+1]) 
     return result 

    result = lists[0] 
    index_set = {i for i in range(1, len(lists))} 
    it = iterations 
    while index_set and it > 0: 
     it -= 1 
     if it == 0: 
      raise UnjoinableListsError('The lists at argument index {}'+ 
       'are unjoinable.'.format(str(index_set))) 
     i = index_set.pop() 
     try: 
      result = sort_two(result, lists[i]) 
     except: 
      index_set.add(i) 
    return result 

有一些清晰而簡單的解決方案我錯過了什麼?感謝您的回答。

+0

_「比方說,我有以下的有序列表:」 _。你確定他們是排序?在列表a中,'59770CD6'在'BD23F32A'之前。在列表c中,'59770CD6'在'BD23F32A'之後。 – Kevin

+0

這些是對象引用的crc32哈希值。是的,他們是預先分類的。否則,我可以很容易地使用'heapq.merge()'。 – SzieberthAdam

回答

0

好吧,沒有答案讓我重做這件事。這裏是一個很好地工作代碼:

def joint_sorted(*sequences): 
    """Sorts two or more presorted sequences. The priority is in 
decreasing order for the case of unambiguous elem order. 

>>> joint_sorted([1,3,4,5,6], [1,2,4,6,7], [6, 10, 11], [12, 11, 17]) 
[1, 3, 2, 4, 5, 6, 7, 10, 12, 11, 17] 
>>> joint_sorted('adgth', 'dbgjhk') 
'adbgtjhk'""" 
    def for_two(first_seq, second_seq): 
     first_set, second_set = set(first_seq), set(second_seq) 
     if (len(first_seq) != len(first_set) 
      or len(second_seq) != len(second_set)): 
      raise TypeError("The sequences must contain " 
       "unique elems only!") 
     common_elems = first_set & second_set 
     before, buf = {}, [] 
     for i, e in iter(enumerate(second_seq)): 
      if e in common_elems: 
       before[e], buf = buf, [] 
      else: 
       buf.append(e) 
     result = [] 
     for e in first_seq: 
      if e in before: 
       result.extend(before[e]) 
      result.append(e) 
     result.extend(buf) 
     if isinstance(first_seq, str): 
      return ''.join(result) 
     return first_seq.__class__(result) 
    first_seq = sequences[0] 
    for i in range(1, len(sequences)): 
     first_seq = for_two(first_seq, sequences[i]) 
    return first_seq