| 1 |
|
|---|
| 2 |
|
|---|
| 3 |
|
|---|
| 4 |
|
|---|
| 5 |
|
|---|
| 6 |
|
|---|
| 7 |
|
|---|
| 8 |
"""a simple LRU (Least-Recently-Used) cache module |
|---|
| 9 |
|
|---|
| 10 |
This module provides very simple LRU (Least-Recently-Used) cache |
|---|
| 11 |
functionality. |
|---|
| 12 |
|
|---|
| 13 |
An *in-memory cache* is useful for storing the results of an |
|---|
| 14 |
'expensive' process (one that takes a lot of time or resources) for |
|---|
| 15 |
later re-use. Typical examples are accessing data from the filesystem, |
|---|
| 16 |
a database, or a network location. If you know you'll need to re-read |
|---|
| 17 |
the data again, it can help to keep it in a cache. |
|---|
| 18 |
|
|---|
| 19 |
You *can* use a Python dictionary as a cache for some purposes. |
|---|
| 20 |
However, if the results you're caching are large, or you have a lot of |
|---|
| 21 |
possible results, this can be impractical memory-wise. |
|---|
| 22 |
|
|---|
| 23 |
An *LRU cache*, on the other hand, only keeps _some_ of the results in |
|---|
| 24 |
memory, which keeps you from overusing resources. The cache is bounded |
|---|
| 25 |
by a maximum size; if you try to add more values to the cache, it will |
|---|
| 26 |
automatically discard the values that you haven't read or written to |
|---|
| 27 |
in the longest time. In other words, the least-recently-used items are |
|---|
| 28 |
discarded. [1]_ |
|---|
| 29 |
|
|---|
| 30 |
.. [1]: 'Discarded' here means 'removed from the cache'. |
|---|
| 31 |
|
|---|
| 32 |
""" |
|---|
| 33 |
|
|---|
| 34 |
from __future__ import generators |
|---|
| 35 |
import time |
|---|
| 36 |
from heapq import heappush, heappop, heapify |
|---|
| 37 |
|
|---|
| 38 |
__version__ = "0.2" |
|---|
| 39 |
__all__ = ['CacheKeyError', 'LRUCache', 'DEFAULT_SIZE'] |
|---|
| 40 |
__docformat__ = 'reStructuredText en' |
|---|
| 41 |
|
|---|
| 42 |
DEFAULT_SIZE = 16 |
|---|
| 43 |
"""Default size of a new LRUCache object, if no 'size' argument is given.""" |
|---|
| 44 |
|
|---|
| 45 |
class CacheKeyError(KeyError): |
|---|
| 46 |
"""Error raised when cache requests fail |
|---|
| 47 |
|
|---|
| 48 |
When a cache record is accessed which no longer exists (or never did), |
|---|
| 49 |
this error is raised. To avoid it, you may want to check for the existence |
|---|
| 50 |
of a cache record before reading or deleting it.""" |
|---|
| 51 |
pass |
|---|
| 52 |
|
|---|
| 53 |
class LRUCache(object): |
|---|
| 54 |
"""Least-Recently-Used (LRU) cache. |
|---|
| 55 |
|
|---|
| 56 |
Instances of this class provide a least-recently-used (LRU) cache. They |
|---|
| 57 |
emulate a Python mapping type. You can use an LRU cache more or less like |
|---|
| 58 |
a Python dictionary, with the exception that objects you put into the |
|---|
| 59 |
cache may be discarded before you take them out. |
|---|
| 60 |
|
|---|
| 61 |
Some example usage:: |
|---|
| 62 |
|
|---|
| 63 |
cache = LRUCache(32) # new cache |
|---|
| 64 |
cache['foo'] = get_file_contents('foo') # or whatever |
|---|
| 65 |
|
|---|
| 66 |
if 'foo' in cache: # if it's still in cache... |
|---|
| 67 |
# use cached version |
|---|
| 68 |
contents = cache['foo'] |
|---|
| 69 |
else: |
|---|
| 70 |
# recalculate |
|---|
| 71 |
contents = get_file_contents('foo') |
|---|
| 72 |
# store in cache for next time |
|---|
| 73 |
cache['foo'] = contents |
|---|
| 74 |
|
|---|
| 75 |
print cache.size # Maximum size |
|---|
| 76 |
|
|---|
| 77 |
print len(cache) # 0 <= len(cache) <= cache.size |
|---|
| 78 |
|
|---|
| 79 |
cache.size = 10 # Auto-shrink on size assignment |
|---|
| 80 |
|
|---|
| 81 |
for i in range(50): # note: larger than cache size |
|---|
| 82 |
cache[i] = i |
|---|
| 83 |
|
|---|
| 84 |
if 0 not in cache: print 'Zero was discarded.' |
|---|
| 85 |
|
|---|
| 86 |
if 42 in cache: |
|---|
| 87 |
del cache[42] # Manual deletion |
|---|
| 88 |
|
|---|
| 89 |
for j in cache: # iterate (in LRU order) |
|---|
| 90 |
print j, cache[j] # iterator produces keys, not values |
|---|
| 91 |
""" |
|---|
| 92 |
|
|---|
| 93 |
class __Node(object): |
|---|
| 94 |
"""Record of a cached value. Not for public consumption.""" |
|---|
| 95 |
|
|---|
| 96 |
def __init__(self, key, obj, timestamp): |
|---|
| 97 |
object.__init__(self) |
|---|
| 98 |
self.key = key |
|---|
| 99 |
self.obj = obj |
|---|
| 100 |
self.atime = timestamp |
|---|
| 101 |
self.mtime = self.atime |
|---|
| 102 |
|
|---|
| 103 |
def __cmp__(self, other): |
|---|
| 104 |
return cmp(self.atime, other.atime) |
|---|
| 105 |
|
|---|
| 106 |
def __repr__(self): |
|---|
| 107 |
return "<%s %s => %s (%s)>" % \ |
|---|
| 108 |
(self.__class__, self.key, self.obj, \ |
|---|
| 109 |
time.asctime(time.localtime(self.atime))) |
|---|
| 110 |
|
|---|
| 111 |
def __init__(self, size=DEFAULT_SIZE): |
|---|
| 112 |
|
|---|
| 113 |
if size <= 0: |
|---|
| 114 |
raise ValueError, size |
|---|
| 115 |
elif type(size) is not type(0): |
|---|
| 116 |
raise TypeError, size |
|---|
| 117 |
object.__init__(self) |
|---|
| 118 |
self.__heap = [] |
|---|
| 119 |
self.__dict = {} |
|---|
| 120 |
self.size = size |
|---|
| 121 |
"""Maximum size of the cache. |
|---|
| 122 |
If more than 'size' elements are added to the cache, |
|---|
| 123 |
the least-recently-used ones will be discarded.""" |
|---|
| 124 |
|
|---|
| 125 |
def __len__(self): |
|---|
| 126 |
return len(self.__heap) |
|---|
| 127 |
|
|---|
| 128 |
def __contains__(self, key): |
|---|
| 129 |
return self.__dict.has_key(key) |
|---|
| 130 |
|
|---|
| 131 |
def __setitem__(self, key, obj): |
|---|
| 132 |
if self.__dict.has_key(key): |
|---|
| 133 |
node = self.__dict[key] |
|---|
| 134 |
node.obj = obj |
|---|
| 135 |
node.atime = time.time() |
|---|
| 136 |
node.mtime = node.atime |
|---|
| 137 |
heapify(self.__heap) |
|---|
| 138 |
else: |
|---|
| 139 |
|
|---|
| 140 |
while len(self.__heap) >= self.size: |
|---|
| 141 |
lru = heappop(self.__heap) |
|---|
| 142 |
del self.__dict[lru.key] |
|---|
| 143 |
node = self.__Node(key, obj, time.time()) |
|---|
| 144 |
self.__dict[key] = node |
|---|
| 145 |
heappush(self.__heap, node) |
|---|
| 146 |
|
|---|
| 147 |
def __getitem__(self, key): |
|---|
| 148 |
if not self.__dict.has_key(key): |
|---|
| 149 |
raise CacheKeyError(key) |
|---|
| 150 |
else: |
|---|
| 151 |
node = self.__dict[key] |
|---|
| 152 |
node.atime = time.time() |
|---|
| 153 |
heapify(self.__heap) |
|---|
| 154 |
return node.obj |
|---|
| 155 |
|
|---|
| 156 |
def __delitem__(self, key): |
|---|
| 157 |
if not self.__dict.has_key(key): |
|---|
| 158 |
raise CacheKeyError(key) |
|---|
| 159 |
else: |
|---|
| 160 |
node = self.__dict[key] |
|---|
| 161 |
del self.__dict[key] |
|---|
| 162 |
self.__heap.remove(node) |
|---|
| 163 |
heapify(self.__heap) |
|---|
| 164 |
return node.obj |
|---|
| 165 |
|
|---|
| 166 |
def __iter__(self): |
|---|
| 167 |
copy = self.__heap[:] |
|---|
| 168 |
while len(copy) > 0: |
|---|
| 169 |
node = heappop(copy) |
|---|
| 170 |
yield node.key |
|---|
| 171 |
raise StopIteration |
|---|
| 172 |
|
|---|
| 173 |
def __setattr__(self, name, value): |
|---|
| 174 |
object.__setattr__(self, name, value) |
|---|
| 175 |
|
|---|
| 176 |
if name == 'size': |
|---|
| 177 |
while len(self.__heap) > value: |
|---|
| 178 |
lru = heappop(self.__heap) |
|---|
| 179 |
del self.__dict[lru.key] |
|---|
| 180 |
|
|---|
| 181 |
def __repr__(self): |
|---|
| 182 |
return "<%s (%d elements)>" % (str(self.__class__), len(self.__heap)) |
|---|
| 183 |
|
|---|
| 184 |
def mtime(self, key): |
|---|
| 185 |
"""Return the last modification time for the cache record with key. |
|---|
| 186 |
May be useful for cache instances where the stored values can get |
|---|
| 187 |
'stale', such as caching file or network resource contents.""" |
|---|
| 188 |
if not self.__dict.has_key(key): |
|---|
| 189 |
raise CacheKeyError(key) |
|---|
| 190 |
else: |
|---|
| 191 |
node = self.__dict[key] |
|---|
| 192 |
return node.mtime |
|---|
| 193 |
|
|---|
| 194 |
if __name__ == "__main__": |
|---|
| 195 |
cache = LRUCache(25) |
|---|
| 196 |
print cache |
|---|
| 197 |
for i in range(50): |
|---|
| 198 |
cache[i] = str(i) |
|---|
| 199 |
print cache |
|---|
| 200 |
if 46 in cache: |
|---|
| 201 |
del cache[46] |
|---|
| 202 |
print cache |
|---|
| 203 |
cache.size = 10 |
|---|
| 204 |
print cache |
|---|
| 205 |
cache[46] = '46' |
|---|
| 206 |
print cache |
|---|
| 207 |
print len(cache) |
|---|
| 208 |
for c in cache: |
|---|
| 209 |
print c |
|---|
| 210 |
print cache |
|---|
| 211 |
print cache.mtime(46) |
|---|
| 212 |
for c in cache: |
|---|
| 213 |
print c |
|---|