2016-11-08 95 views
1

我想知道如何使用OrderedDict來實現基於大小的LRU。我正在努力的部分是當我打電話給__contains__時,移動鏈表的頭部。除了__contains__方法之外,以下實現正在工作。它導致無限遞歸。任何想法,我可以做到這一點?Python使用OrderedDict進行基於LRU大小的緩存

from collections import OrderedDict 

class Cache(OrderedDict): 
    def __init__(self, *args, **kwds): 
    self.size_limit = kwds.pop("size_limit", None) 
    OrderedDict.__init__(self, *args, **kwds) 
    self.val_sum = 0 
    self.hit = 0 
    self.num_evicted = 0 
    self.total_req = 0 
    self._check_size_limit() 

    def __contains__(self, key): 
    self.total_req += 1 
    if OrderedDict.__contains__(self, key): 
     self.hit += 1 
     value = OrderedDict.__getitem__ (self,key) 
     self.move_item_to_the_top(key, value) 
     return True 
    else: 
     return False 

    def move_item_to_the_top(self, key, value): 
    OrderedDict.__setitem__(self, key, value) 

    def __setitem__(self, key, value): 
    OrderedDict.__setitem__(self, key, value) 
    self.val_sum += value 
    self._check_size_limit() 

    def _check_size_limit(self): 
    if self.size_limit is not None: 
     while self.val_sum > self.size_limit: 
     key, value = self.popitem(last=False) 
     self.val_sum -= value 
     self.num_evicted += 1 

    def get_cache_size(self): 
    return self.val_sum 

    def get_number_evicted(self): 
    return self.num_evicted 

    def get_hit_ratio(self): 
    return 1.00 * self.hit/self.total_req 

    def get_total_req(self): 
    return self.total_req 

    def get_hits(self): 
    return self.hit 

這是我如何使用這個:

if __name__ == "__main__": 


    cache_size_B = 10 
    cache = Cache(size_limit=cache_size_B) 

    items = [(1,3), (2,3), (1,3), (3,4), (1,3), (5,5)] 


    for item in items: 

    cache_key = str(item[0]) 
    obj_size = item[1] 
    print item 

    if cache_key not in cache: 
     cache[cache_key] = int(obj_size) 

    print cache 
+1

不知道這是否是最好的方法,但不能彈出密鑰然後設置它? –

回答

2

運行你的代碼中,我得到以下錯誤:

python cache.py 
(1, 3) 
(2, 3) 
(1, 3) 
Traceback (most recent call last): 
    File "cache.py", line 68, in <module> 
    if cache_key not in cache: 
    File "cache.py", line 20, in __contains__ 
    self.move_item_to_the_top(key, value) 
    File "cache.py", line 26, in move_item_to_the_top 
    OrderedDict.__setitem__(self, key, value) 
    File "/usr/lib/python2.7/collections.py", line 75, in __setitem__ 
    if key not in self: 
    File "cache.py", line 20, in __contains__ 
    self.move_item_to_the_top(key, value) 
    File "cache.py", line 26, in move_item_to_the_top 
    OrderedDict.__setitem__(self, key, value) 
    File "/usr/lib/python2.7/collections.py", line 75, in __setitem__ 
    if key not in self: 

[...] 

    File "cache.py", line 26, in move_item_to_the_top 
    OrderedDict.__setitem__(self, key, value) 
    File "/usr/lib/python2.7/collections.py", line 75, in __setitem__ 
    if key not in self: 
    File "cache.py", line 20, in __contains__ 
    self.move_item_to_the_top(key, value) 
    File "cache.py", line 26, in move_item_to_the_top 
    OrderedDict.__setitem__(self, key, value) 
RuntimeError: maximum recursion depth exceeded in __instancecheck__ 

望着線75 collections.py它表明你的回調導致無限循環的Cache.__contains__

你可以重寫Cache類沒有overiding __contains__,而使用Cache.__getitem__跟蹤對高速緩存訪​​問:

from collections import OrderedDict 


class Cache(OrderedDict): 

    def __init__(self, *args, **kwds): 
     self.size_limit = kwds.pop("size_limit", None) 
     OrderedDict.__init__(self, *args, **kwds) 
     self.val_sum = 0 
     self.hit = 0 
     self.num_evicted = 0 
     self.total_req = 0 
     self._check_size_limit() 

    def move_item_to_the_top(self, key, value): 
     del self[key] 
     OrderedDict.__setitem__(self, key, value) 

    def __getitem__(self, key): 
     self.total_req += 1 
     try: 
      value = OrderedDict.__getitem__(self, key) 
     except KeyError: 
      raise 
     else: 
      self.hit += 1 
      self.move_item_to_the_top(key, value) 
      return value 

    def __setitem__(self, key, value): 
     OrderedDict.__setitem__(self, key, value) 
     self.val_sum += value 
     self._check_size_limit() 

    def _check_size_limit(self): 
     if self.size_limit is not None: 
      while self.val_sum > self.size_limit: 
       key, value = self.popitem(last=False) 
       self.val_sum -= value 
       self.num_evicted += 1 

    def get_cache_size(self): 
     return self.val_sum 

    def get_number_evicted(self): 
     return self.num_evicted 

    def get_hit_ratio(self): 
     return 1.00 * self.hit/self.total_req 

    def get_total_req(self): 
     return self.total_req 

    def get_hits(self): 
     return self.hit 


if __name__ == "__main__": 
    cache_size_B = 10 
    cache = Cache(size_limit=cache_size_B) 

    items = [(1,3), (2,3), (1,3), (3,4), (1,3), (5,5)] 

    for item in items: 

     cache_key = str(item[0]) 
     obj_size = item[1] 
     print item 

     try: 
      cache[cache_key] 
     except KeyError: 
      cache[cache_key] = int(obj_size) 

    print cache 

,您仍然可以使用foo not in cache但這不會算作一個小姐或打擊。如果要算任何訪問使用首選語法try/except [1],如:

if __name__ == "__main__": 
    cache_size_B = 10 
    cache = Cache(size_limit=cache_size_B) 

    items = [(1,3), (2,3), (1,3), (3,4), (1,3), (5,5)] 

    for item in items: 

     cache_key = str(item[0]) 
     obj_size = item[1] 
     print item 

     try: 
      cache[cache_key] 
     except KeyError: 
      cache[cache_key] = int(obj_size) 

    print cache 

[1]這是首選語法conditionaly做基於不是在一個項目是否存在等什麼列表或字典,因爲它只需要對容器的單一訪問。

+0

在'move_to_the_top'方法中,您需要刪除該項目(通過調用__delitem__'),否則它不會將其移動到頂部。不過,我仍然想知道是否有更優雅的方式來做到這一點。 – Amir

+1

我更新爲代碼。我認爲這是最好的方式。 – amirouche