1 patch for repository davidsarah@dev.allmydata.org:/home/darcs/tahoe/trunk: Wed Oct 27 05:33:02 GMT Daylight Time 2010 david-sarah@jacaranda.org * make ResponseCache smarter to avoid memory leaks: don't record timestamps, use DataSpans to merge entries, and clear the cache when we see a new seqnum. refs #1045, #1229 New patches: [make ResponseCache smarter to avoid memory leaks: don't record timestamps, use DataSpans to merge entries, and clear the cache when we see a new seqnum. refs #1045, #1229 david-sarah@jacaranda.org**20101027043302 Ignore-this: 88fd6fba7f35a2f8af1693b92718f5f3 ] { hunk ./src/allmydata/mutable/common.py 3 from allmydata.util import idlib -from allmydata.util.dictutil import DictOfSets +from allmydata.util.spans import DataSpans MODE_CHECK = "MODE_CHECK" # query all peers MODE_ANYTHING = "MODE_ANYTHING" # one recoverable version hunk ./src/allmydata/mutable/common.py 62 class ResponseCache: """I cache share data, to reduce the number of round trips used during mutable file operations. All of the data in my cache is for a single - storage index, but I will keep information on multiple shares (and - multiple versions) for that storage index. + storage index, but I will keep information on multiple shares for + that storage index. + + I maintain a highest-seen sequence number, and will flush all entries + each time this number increases (this doesn't necessarily imply that + all entries have the same sequence number). My cache is indexed by a (verinfo, shnum) tuple. hunk ./src/allmydata/mutable/common.py 71 - My cache entries contain a set of non-overlapping byteranges: (start, - data, timestamp) tuples. + My cache entries are DataSpans instances, each representing a set of + non-overlapping byteranges. """ def __init__(self): hunk ./src/allmydata/mutable/common.py 76 - self.cache = DictOfSets() + self.cache = {} + self.seqnum = None def _clear(self): hunk ./src/allmydata/mutable/common.py 80 - # used by unit tests - self.cache = DictOfSets() - - def _does_overlap(self, x_start, x_length, y_start, y_length): - if x_start < y_start: - x_start, y_start = y_start, x_start - x_length, y_length = y_length, x_length - x_end = x_start + x_length - y_end = y_start + y_length - # this just returns a boolean. Eventually we'll want a form that - # returns a range. - if not x_length: - return False - if not y_length: - return False - if x_start >= y_end: - return False - if y_start >= x_end: - return False - return True - + # also used by unit tests + self.cache = {} hunk ./src/allmydata/mutable/common.py 83 - def _inside(self, x_start, x_length, y_start, y_length): - x_end = x_start + x_length - y_end = y_start + y_length - if x_start < y_start: - return False - if x_start >= y_end: - return False - if x_end < y_start: - return False - if x_end > y_end: - return False - return True + def add(self, verinfo, shnum, offset, data): + seqnum = verinfo[0] + if seqnum > self.seqnum: + self._clear() + self.seqnum = seqnum hunk ./src/allmydata/mutable/common.py 89 - def add(self, verinfo, shnum, offset, data, timestamp): index = (verinfo, shnum) hunk ./src/allmydata/mutable/common.py 90 - self.cache.add(index, (offset, data, timestamp) ) + if index in self.cache: + self.cache[index].add(offset, data) + else: + spans = DataSpans() + spans.add(offset, data) + self.cache[index] = spans def read(self, verinfo, shnum, offset, length): """Try to satisfy a read request from cache. hunk ./src/allmydata/mutable/common.py 99 - Returns (data, timestamp), or (None, None) if the cache did not hold - the requested data. + Returns data, or None if the cache did not hold the entire requested span. """ hunk ./src/allmydata/mutable/common.py 102 - # TODO: join multiple fragments, instead of only returning a hit if - # we have a fragment that contains the whole request + # TODO: perhaps return a DataSpans object representing the fragments + # that we have, instead of only returning a hit if we can satisfy the + # whole request from cache. index = (verinfo, shnum) hunk ./src/allmydata/mutable/common.py 107 - for entry in self.cache.get(index, set()): - (e_start, e_data, e_timestamp) = entry - if self._inside(offset, length, e_start, len(e_data)): - want_start = offset - e_start - want_end = offset+length - e_start - return (e_data[want_start:want_end], e_timestamp) - return None, None - - + if index in self.cache: + return self.cache[index].get(offset, length) + else: + return None hunk ./src/allmydata/mutable/filenode.py 152 self._privkey = privkey def _populate_encprivkey(self, encprivkey): self._encprivkey = encprivkey - def _add_to_cache(self, verinfo, shnum, offset, data, timestamp): - self._cache.add(verinfo, shnum, offset, data, timestamp) + def _add_to_cache(self, verinfo, shnum, offset, data): + self._cache.add(verinfo, shnum, offset, data) def _read_from_cache(self, verinfo, shnum, offset, length): return self._cache.read(verinfo, shnum, offset, length) hunk ./src/allmydata/mutable/publish.py 10 from twisted.python import failure from allmydata.interfaces import IPublishStatus from allmydata.util import base32, hashutil, mathutil, idlib, log +from allmydata.util.dictutil import DictOfSets from allmydata import hashtree, codec from allmydata.storage.server import si_b2a from pycryptopp.cipher.aes import AES hunk ./src/allmydata/mutable/publish.py 16 from foolscap.api import eventually, fireEventually -from allmydata.mutable.common import MODE_WRITE, MODE_CHECK, DictOfSets, \ +from allmydata.mutable.common import MODE_WRITE, MODE_CHECK, \ UncoordinatedWriteError, NotEnoughServersError from allmydata.mutable.servermap import ServerMap from allmydata.mutable.layout import pack_prefix, pack_share, unpack_header, pack_checkstring, \ hunk ./src/allmydata/mutable/retrieve.py 10 from foolscap.api import DeadReferenceError, eventually, fireEventually from allmydata.interfaces import IRetrieveStatus, NotEnoughSharesError from allmydata.util import hashutil, idlib, log +from allmydata.util.dictutil import DictOfSets from allmydata import hashtree, codec from allmydata.storage.server import si_b2a from pycryptopp.cipher.aes import AES hunk ./src/allmydata/mutable/retrieve.py 16 from pycryptopp.publickey import rsa -from allmydata.mutable.common import DictOfSets, CorruptShareError, UncoordinatedWriteError +from allmydata.mutable.common import CorruptShareError, UncoordinatedWriteError from allmydata.mutable.layout import SIGNED_PREFIX, unpack_share_data class RetrieveStatus: hunk ./src/allmydata/mutable/retrieve.py 202 got_from_cache = False datavs = [] for (offset, length) in readv: - (data, timestamp) = self._node._read_from_cache(self.verinfo, shnum, - offset, length) + data = self._node._read_from_cache(self.verinfo, shnum, offset, length) if data is not None: datavs.append(data) if len(datavs) == len(readv): hunk ./src/allmydata/mutable/servermap.py 9 from twisted.python import failure from foolscap.api import DeadReferenceError, RemoteException, eventually from allmydata.util import base32, hashutil, idlib, log +from allmydata.util.dictutil import DictOfSets from allmydata.storage.server import si_b2a from allmydata.interfaces import IServermapUpdaterStatus from pycryptopp.publickey import rsa hunk ./src/allmydata/mutable/servermap.py 15 from allmydata.mutable.common import MODE_CHECK, MODE_ANYTHING, MODE_WRITE, MODE_READ, \ - DictOfSets, CorruptShareError, NeedMoreDataError + CorruptShareError, NeedMoreDataError from allmydata.mutable.layout import unpack_prefix_and_signature, unpack_header, unpack_share, \ SIGNED_PREFIX_LENGTH hunk ./src/allmydata/mutable/servermap.py 585 verinfo = self._got_results_one_share(shnum, data, peerid, lp) last_verinfo = verinfo last_shnum = shnum - self._node._add_to_cache(verinfo, shnum, 0, data, now) + self._node._add_to_cache(verinfo, shnum, 0, data) except CorruptShareError, e: # log it and give the other shares a chance to be processed f = failure.Failure() hunk ./src/allmydata/test/test_mutable.py 304 d.addCallback(lambda res: self.failUnlessEqual(res, "contents")) d.addCallback(lambda ign: self.failUnless(isinstance(n._cache, ResponseCache))) - def _check_cache_size(expected): - # The total size of cache entries should not increase on the second download. + def _check_cache(expected): + # The total size of cache entries should not increase on the second download; + # in fact the cache contents should be identical. d2 = n.download_best_version() hunk ./src/allmydata/test/test_mutable.py 308 - d2.addCallback(lambda ign: self.failUnlessEqual(len(repr(n._cache.cache)), expected)) + d2.addCallback(lambda rep: self.failUnlessEqual(repr(n._cache.cache), expected)) return d2 hunk ./src/allmydata/test/test_mutable.py 310 - d.addCallback(lambda ign: _check_cache_size(len(repr(n._cache.cache)))) + d.addCallback(lambda ign: _check_cache(repr(n._cache.cache))) return d d.addCallback(_created) return d hunk ./src/allmydata/test/test_mutable.py 314 - test_response_cache_memory_leak.todo = "This isn't fixed (see #1045)." def test_create_with_initial_contents_function(self): data = "initial contents" hunk ./src/allmydata/test/test_mutable.py 1720 class Utils(unittest.TestCase): - def _do_inside(self, c, x_start, x_length, y_start, y_length): - # we compare this against sets of integers - x = set(range(x_start, x_start+x_length)) - y = set(range(y_start, y_start+y_length)) - should_be_inside = x.issubset(y) - self.failUnlessEqual(should_be_inside, c._inside(x_start, x_length, - y_start, y_length), - str((x_start, x_length, y_start, y_length))) - - def test_cache_inside(self): - c = ResponseCache() - x_start = 10 - x_length = 5 - for y_start in range(8, 17): - for y_length in range(8): - self._do_inside(c, x_start, x_length, y_start, y_length) - - def _do_overlap(self, c, x_start, x_length, y_start, y_length): - # we compare this against sets of integers - x = set(range(x_start, x_start+x_length)) - y = set(range(y_start, y_start+y_length)) - overlap = bool(x.intersection(y)) - self.failUnlessEqual(overlap, c._does_overlap(x_start, x_length, - y_start, y_length), - str((x_start, x_length, y_start, y_length))) - - def test_cache_overlap(self): - c = ResponseCache() - x_start = 10 - x_length = 5 - for y_start in range(8, 17): - for y_length in range(8): - self._do_overlap(c, x_start, x_length, y_start, y_length) - def test_cache(self): c = ResponseCache() # xdata = base62.b2a(os.urandom(100))[:100] hunk ./src/allmydata/test/test_mutable.py 1725 xdata = "1Ex4mdMaDyOl9YnGBM3I4xaBF97j8OQAg1K3RBR01F2PwTP4HohB3XpACuku8Xj4aTQjqJIR1f36mEj3BCNjXaJmPBEZnnHL0U9l" ydata = "4DCUQXvkEPnnr9Lufikq5t21JsnzZKhzxKBhLhrBB6iIcBOWRuT4UweDhjuKJUre8A4wOObJnl3Kiqmlj4vjSLSqUGAkUD87Y3vs" - nope = (None, None) - c.add("v1", 1, 0, xdata, "time0") - c.add("v1", 1, 2000, ydata, "time1") - self.failUnlessEqual(c.read("v2", 1, 10, 11), nope) - self.failUnlessEqual(c.read("v1", 2, 10, 11), nope) - self.failUnlessEqual(c.read("v1", 1, 0, 10), (xdata[:10], "time0")) - self.failUnlessEqual(c.read("v1", 1, 90, 10), (xdata[90:], "time0")) - self.failUnlessEqual(c.read("v1", 1, 300, 10), nope) - self.failUnlessEqual(c.read("v1", 1, 2050, 5), (ydata[50:55], "time1")) - self.failUnlessEqual(c.read("v1", 1, 0, 101), nope) - self.failUnlessEqual(c.read("v1", 1, 99, 1), (xdata[99:100], "time0")) - self.failUnlessEqual(c.read("v1", 1, 100, 1), nope) - self.failUnlessEqual(c.read("v1", 1, 1990, 9), nope) - self.failUnlessEqual(c.read("v1", 1, 1990, 10), nope) - self.failUnlessEqual(c.read("v1", 1, 1990, 11), nope) - self.failUnlessEqual(c.read("v1", 1, 1990, 15), nope) - self.failUnlessEqual(c.read("v1", 1, 1990, 19), nope) - self.failUnlessEqual(c.read("v1", 1, 1990, 20), nope) - self.failUnlessEqual(c.read("v1", 1, 1990, 21), nope) - self.failUnlessEqual(c.read("v1", 1, 1990, 25), nope) - self.failUnlessEqual(c.read("v1", 1, 1999, 25), nope) + c.add("v1", 1, 0, xdata) + c.add("v1", 1, 2000, ydata) + self.failUnlessEqual(c.read("v2", 1, 10, 11), None) + self.failUnlessEqual(c.read("v1", 2, 10, 11), None) + self.failUnlessEqual(c.read("v1", 1, 0, 10), xdata[:10]) + self.failUnlessEqual(c.read("v1", 1, 90, 10), xdata[90:]) + self.failUnlessEqual(c.read("v1", 1, 300, 10), None) + self.failUnlessEqual(c.read("v1", 1, 2050, 5), ydata[50:55]) + self.failUnlessEqual(c.read("v1", 1, 0, 101), None) + self.failUnlessEqual(c.read("v1", 1, 99, 1), xdata[99:100]) + self.failUnlessEqual(c.read("v1", 1, 100, 1), None) + self.failUnlessEqual(c.read("v1", 1, 1990, 9), None) + self.failUnlessEqual(c.read("v1", 1, 1990, 10), None) + self.failUnlessEqual(c.read("v1", 1, 1990, 11), None) + self.failUnlessEqual(c.read("v1", 1, 1990, 15), None) + self.failUnlessEqual(c.read("v1", 1, 1990, 19), None) + self.failUnlessEqual(c.read("v1", 1, 1990, 20), None) + self.failUnlessEqual(c.read("v1", 1, 1990, 21), None) + self.failUnlessEqual(c.read("v1", 1, 1990, 25), None) + self.failUnlessEqual(c.read("v1", 1, 1999, 25), None) hunk ./src/allmydata/test/test_mutable.py 1746 - # optional: join fragments + # test joining fragments c = ResponseCache() hunk ./src/allmydata/test/test_mutable.py 1748 - c.add("v1", 1, 0, xdata[:10], "time0") - c.add("v1", 1, 10, xdata[10:20], "time1") - #self.failUnlessEqual(c.read("v1", 1, 0, 20), (xdata[:20], "time0")) + c.add("v1", 1, 0, xdata[:10]) + c.add("v1", 1, 10, xdata[10:20]) + self.failUnlessEqual(c.read("v1", 1, 0, 20), xdata[:20]) class Exceptions(unittest.TestCase): def test_repr(self): } Context: [docs/known_issues.rst: Add section on traffic analysis. Fix URL for current version of file. david-sarah@jacaranda.org**20101024234259 Ignore-this: f3416e79d3bb833f5118da23e85723ad ] [test_mutable.py: add test for ResponseCache memory leak. refs #1045, #1129 david-sarah@jacaranda.org**20101024193409 Ignore-this: 3aee7f0677956cc6deaccb4d5b8e415f ] [docs/running.html: fix missing end-quote, and change frontends/ doc references to .rst. david-sarah@jacaranda.org**20101024171500 Ignore-this: 47c645a6595e1790b1d1adfa71af0e1d ] [docs/running.html: 'tahoe create-client' now creates a node with storage disabled. Also change configuration.txt references to configuration.rst. david-sarah@jacaranda.org**20101024170431 Ignore-this: e5b048055494ba3505bb8a506610681c ] [test_encodingutil.py: test_argv_to_unicode modified the wrong encoding variable. fixes #1214 david-sarah@jacaranda.org**20101023035810 Ignore-this: e5f1f849931b96939facc53d93ff61c5 ] [doc: add explanation of the motivation for the surprising and awkward API to erasure coding zooko@zooko.com**20101015060202 Ignore-this: 428913ff6e1bf5b393deffb1f20b949b ] [setup: catch and log ValueError from locale.getdefaultlocale() in show-tool-versions.py zooko@zooko.com**20101015054440 Ignore-this: 827d91490562c32ed7cf6526dfded773 I got a bug report from Mathias Baert showing that locale.getdefaultlocale() raises an exception on his Mac OS X system. Heh. ] [docs: update how-to-make-a-release doc with a few tweaks from the 1.8.0 process zooko@zooko.com**20101015054413 Ignore-this: ca5e9478531a3393792ae283239549dd ] [docs: update NEWS ref: #1216 zooko@zooko.com**20101015053719 Ignore-this: 2e0b92e4145d667cdf075e64b7965530 ] [docs: fix tab-vs-spaces, make some CLI examples /"literal", wrap some to Brian Warner **20101015060606 Ignore-this: eae08bdf0afb19a2fbf41c31e70a8122 80-cols, remove spurious whitespace. Add rst2html.py rule to Makefile. ] [docs: add Peter Secor, Shawn Willden, and Terrell Russell as signatories to docs/backdoors.rst zooko@zooko.com**20101015053242 Ignore-this: c77adf819d664f673e17c4aaeb353f33 ] [docs: convert all .txt docs to .rst thanks to Ravi Pinjala zooko@zooko.com**20101015052913 Ignore-this: 178a5122423189ecfc45b142314a78ec fixes #1225 ] [docs: add statement on our refusal to insert backdoors zooko@zooko.com**20101006051147 Ignore-this: 644d308319a7b80c4434bdff9760404a ] [setup: add --multi-version to the "setup.py develop" command-line zooko@zooko.com**20101005182350 Ignore-this: 709155cc21caff29826b8d41a8c8d63d fixes #530. I earlier tried this twice (see #530 for history) and then twice rolled it back due to some problems that arose. However, I didn't write down what the problems were in enough detail on the ticket that I can tell today whether those problems are still issues, so here goes the third attempt. (I did write down on the ticket that it would not create site.py or .pth files in the target directory with --multi-version mode, but I didn't explain why *that* was a problem.) ] [setup: use execfile to access _auto_deps.py in its proper location of src/allmydata/ instead of copying it into place when setup.py is executed zooko@zooko.com**20100906055714 Ignore-this: c179b42672d775580afad40121f86812 ] [trivial: M-x whitespace-cleanup zooko@zooko.com**20100903144712 Ignore-this: 1bb764d11ac69b4a35ea091cfb13158a ] [minor: remove unused interface declaration, change allmydata.org to tahoe-lafs.org in email address, fix wording in relnotes.txt zooko@zooko.com**20100930153708 Ignore-this: a452969228afed2774de375e29fa3048 ] [immutable/repairer.py: don't use the default happiness setting when repairing Kevan Carstensen **20100927200102 Ignore-this: bd704d9744b970849da8d46a16b8089a ] [NEWS: note dependency updates to pycryptopp and pycrypto. david-sarah@jacaranda.org**20100924191207 Ignore-this: eeaf5c9c9104f24c450c2ec4482ac1ee ] [TAG allmydata-tahoe-1.8.0 zooko@zooko.com**20100924021631 Ignore-this: 494ca0a885c5e20c883845fc53e7ab5d ] Patch bundle hash: 62cba8391bf4fcbaf0f842a45be08e30f83ac5f4