1 patch for repository /home/zooko/playground/tahoe-lafs/999/dsv16: Wed Sep 28 23:50:38 MDT 2011 zooko@zooko.com * split Immutable S3 Share into for-reading and for-writing classes, remove unused (as far as I can tell) methods, use cStringIO for buffering the writes TODO: define the interfaces that the new classes claim to implement New patches: [split Immutable S3 Share into for-reading and for-writing classes, remove unused (as far as I can tell) methods, use cStringIO for buffering the writes zooko@zooko.com**20110929055038 Ignore-this: 82d8c4488a8548936285a975ef5a1559 TODO: define the interfaces that the new classes claim to implement ] { hunk ./src/allmydata/interfaces.py 503 def get_used_space(): """ - Returns the amount of backend storage including overhead, in bytes, used - by this share. + Returns the amount of backend storage including overhead (which may + have to be estimated), in bytes, used by this share. """ def unlink(): hunk ./src/allmydata/storage/backends/s3/immutable.py 3 import struct +from cStringIO import StringIO from twisted.internet import defer hunk ./src/allmydata/storage/backends/s3/immutable.py 27 # data_length+0x0c: first lease. Each lease record is 72 bytes. -class ImmutableS3Share(object): - implements(IStoredShare) +class ImmutableS3ShareBase(object): + implements(IShareBase) # XXX sharetype = "immutable" LEASE_SIZE = struct.calcsize(">L32s32sL") # for compatibility hunk ./src/allmydata/storage/backends/s3/immutable.py 35 HEADER = ">LLL" HEADER_SIZE = struct.calcsize(HEADER) - def __init__(self, s3bucket, storageindex, shnum, max_size=None, data=None): - """ - If max_size is not None then I won't allow more than max_size to be written to me. - - Clients should use the load_immutable_s3_share and create_immutable_s3_share - factory functions rather than creating instances directly. - """ + def __init__(self, s3bucket, storageindex, shnum): self._s3bucket = s3bucket self._storageindex = storageindex self._shnum = shnum hunk ./src/allmydata/storage/backends/s3/immutable.py 39 - self._max_size = max_size - self._data = data self._key = get_s3_share_key(storageindex, shnum) hunk ./src/allmydata/storage/backends/s3/immutable.py 40 - self._data_offset = self.HEADER_SIZE - self._loaded = False def __repr__(self): hunk ./src/allmydata/storage/backends/s3/immutable.py 42 - return ("" % (self._key,)) - - def load(self): - if self._max_size is not None: # creating share - # The second field, which was the four-byte share data length in - # Tahoe-LAFS versions prior to 1.3.0, is not used; we always write 0. - # We also write 0 for the number of leases. - self._home.setContent(struct.pack(self.HEADER, 1, 0, 0) ) - self._end_offset = self.HEADER_SIZE + self._max_size - self._size = self.HEADER_SIZE - self._writes = [] - self._loaded = True - return defer.succeed(None) - - if self._data is None: - # If we don't already have the data, get it from S3. - d = self._s3bucket.get_object(self._key) - else: - d = defer.succeed(self._data) - - def _got_data(data): - self._data = data - header = self._data[:self.HEADER_SIZE] - (version, unused, num_leases) = struct.unpack(self.HEADER, header) - - if version != 1: - msg = "%r had version %d but we wanted 1" % (self, version) - raise UnknownImmutableContainerVersionError(msg) - - # We cannot write leases in share files, but allow them to be present - # in case a share file is copied from a disk backend, or in case we - # need them in future. - self._size = len(self._data) - self._end_offset = self._size - (num_leases * self.LEASE_SIZE) - self._loaded = True - d.addCallback(_got_data) - return d - - def close(self): - # This will briefly use memory equal to double the share size. - # We really want to stream writes to S3, but I don't think txaws supports that yet - # (and neither does IS3Bucket, since that's a thin wrapper over the txaws S3 API). - - self._data = "".join(self._writes) - del self._writes - self._s3bucket.put_object(self._key, self._data) - return defer.succeed(None) - - def get_used_space(self): - return self._size + return ("<%s at %r>" % (self.__class__.__name__, self._key,)) def get_storage_index(self): return self._storageindex hunk ./src/allmydata/storage/backends/s3/immutable.py 53 def get_shnum(self): return self._shnum - def unlink(self): - self._data = None - self._writes = None - return self._s3bucket.delete_object(self._key) +class ImmutableS3ShareForWriting(ImmutableS3ShareBase): + implements(IShareForWriting) # XXX + + def __init__(self, s3bucket, storageindex, shnum, max_size): + """ + I won't allow more than max_size to be written to me. + """ + precondition(isinstance(max_size, (int, long)), max_size) + ImmutableS3ShareBase.__init__(self, s3bucket, storageindex, shnum) + self._max_size = max_size + self._end_offset = self.HEADER_SIZE + self._max_size + + self._buf = StringIO() + # The second field, which was the four-byte share data length in + # Tahoe-LAFS versions prior to 1.3.0, is not used; we always write 0. + # We also write 0 for the number of leases. + self._buf.write(struct.pack(self.HEADER, 1, 0, 0) ) + + def close(self): + # We really want to stream writes to S3, but txaws doesn't support + # that yet (and neither does IS3Bucket, since that's a thin wrapper + # over the txaws S3 API). See + # https://bugs.launchpad.net/txaws/+bug/767205 and + # https://bugs.launchpad.net/txaws/+bug/783801 + return self._s3bucket.put_object(self._key, self._buf.getvalue()) def get_allocated_size(self): return self._max_size hunk ./src/allmydata/storage/backends/s3/immutable.py 82 - def get_size(self): - return self._size + def write_share_data(self, offset, data): + self._buf.seek(offset) + self._buf.write(data) + if self._buf.tell() > self._max_size: + raise DataTooLargeError(self._max_size, offset, len(data)) + return defer.succeed(None) + +class ImmutableS3ShareForReading(object): + implements(IStoredShareForReading) # XXX + + def __init__(self, s3bucket, storageindex, shnum, data): + ImmutableS3ShareBase.__init__(self, s3bucket, storageindex, shnum) + self._data = data + + header = self._data[:self.HEADER_SIZE] + (version, unused, num_leases) = struct.unpack(self.HEADER, header) hunk ./src/allmydata/storage/backends/s3/immutable.py 99 - def get_data_length(self): - return self._end_offset - self._data_offset + if version != 1: + msg = "%r had version %d but we wanted 1" % (self, version) + raise UnknownImmutableContainerVersionError(msg) + + # We cannot write leases in share files, but allow them to be present + # in case a share file is copied from a disk backend, or in case we + # need them in future. + self._end_offset = len(self._data) - (num_leases * self.LEASE_SIZE) def readv(self, readv): datav = [] hunk ./src/allmydata/storage/backends/s3/immutable.py 119 # Reads beyond the end of the data are truncated. Reads that start # beyond the end of the data return an empty string. - seekpos = self._data_offset+offset + seekpos = self.HEADER_SIZE+offset actuallength = max(0, min(length, self._end_offset-seekpos)) if actuallength == 0: return defer.succeed("") hunk ./src/allmydata/storage/backends/s3/immutable.py 124 return defer.succeed(self._data[offset:offset+actuallength]) - - def write_share_data(self, offset, data): - length = len(data) - precondition(offset >= self._size, "offset = %r, size = %r" % (offset, self._size)) - if self._max_size is not None and offset+length > self._max_size: - raise DataTooLargeError(self._max_size, offset, length) - - if offset > self._size: - self._writes.append("\x00" * (offset - self._size)) - self._writes.append(data) - self._size = offset + len(data) - return defer.succeed(None) - - def add_lease(self, lease_info): - pass - - -def load_immutable_s3_share(s3bucket, storageindex, shnum, data=None): - return ImmutableS3Share(s3bucket, storageindex, shnum, data=data).load() - -def create_immutable_s3_share(s3bucket, storageindex, shnum, max_size): - return ImmutableS3Share(s3bucket, storageindex, shnum, max_size=max_size).load() hunk ./src/allmydata/storage/backends/s3/s3_backend.py 9 from allmydata.storage.common import si_a2b from allmydata.storage.bucket import BucketWriter from allmydata.storage.backends.base import Backend, ShareSet -from allmydata.storage.backends.s3.immutable import load_immutable_s3_share, create_immutable_s3_share +from allmydata.storage.backends.s3.immutable import ImmutableS3ShareForReading, ImmutableS3ShareForWriting from allmydata.storage.backends.s3.mutable import load_mutable_s3_share, create_mutable_s3_share from allmydata.storage.backends.s3.s3_common import get_s3_share_key, NUM_RE from allmydata.mutable.layout import MUTABLE_MAGIC hunk ./src/allmydata/storage/backends/s3/s3_backend.py 107 return load_mutable_s3_share(self._s3bucket, self._storageindex, shnum, data=data) else: # assume it's immutable - return load_immutable_s3_share(self._s3bucket, self._storageindex, shnum, data=data) + return ImmutableS3ShareForReading(self._s3bucket, self._storageindex, shnum, data=data) d.addCallback(_make_share) return d hunk ./src/allmydata/storage/backends/s3/s3_backend.py 116 return False def make_bucket_writer(self, storageserver, shnum, max_space_per_bucket, lease_info, canary): - d = create_immutable_s3_share(self._s3bucket, self.get_storage_index(), shnum, + immsh = ImmutableS3ShareForWriting(self._s3bucket, self.get_storage_index(), shnum, max_size=max_space_per_bucket) hunk ./src/allmydata/storage/backends/s3/s3_backend.py 118 - def _created(immsh): - return BucketWriter(storageserver, immsh, lease_info, canary) - d.addCallback(_created) - return d + return defer.succeed(BucketWriter(storageserver, immsh, lease_info, canary)) def _create_mutable_share(self, storageserver, shnum, write_enabler): serverid = storageserver.get_serverid() } Context: [Comment out an assertion that was causing all mutable tests to fail. THIS IS PROBABLY WRONG. refs #999 david-sarah@jacaranda.org**20110929041110 Ignore-this: 1e402d51ec021405b191757a37b35a94 ] [Fix some incorrect or incomplete asyncifications. refs #999 david-sarah@jacaranda.org**20110929040800 Ignore-this: ed70e9af2190217c84fd2e8c41de4c7e ] [Add some debugging assertions that share objects are not Deferred. refs #999 david-sarah@jacaranda.org**20110929040657 Ignore-this: 5c7f56a146f5a3c353c6fe5b090a7dc5 ] [scripts/debug.py: take account of some API changes. refs #999 david-sarah@jacaranda.org**20110929040539 Ignore-this: 933c3d44b993c041105038c7d4514386 ] [Make get_sharesets_for_prefix synchronous for the time being (returning a Deferred breaks crawlers). refs #999 david-sarah@jacaranda.org**20110929040136 Ignore-this: e94b93d4f3f6173d9de80c4121b68748 ] [More asyncification of tests. refs #999 david-sarah@jacaranda.org**20110929035644 Ignore-this: 28b650a9ef593b3fd7524f6cb562ad71 ] [no_network.py: add some assertions that the things we wrap using LocalWrapper are not Deferred (which is not supported and causes hard-to-debug failures). refs #999 david-sarah@jacaranda.org**20110929035537 Ignore-this: fd103fbbb54fbbc17b9517c78313120e ] [Add some debugging code (switched off) to no_network.py. When switched on (PRINT_TRACEBACKS = True), this prints the stack trace associated with the caller of a remote method, mitigating the problem that the traceback normally gets lost at that point. TODO: think of a better way to preserve the traceback that can be enabled by default. refs #999 david-sarah@jacaranda.org**20110929035341 Ignore-this: 2a593ec3ee450719b241ea8d60a0f320 ] [Use factory functions to create share objects rather than their constructors, to allow the factory to return a Deferred. Also change some methods on IShareSet and IStoredShare to return Deferreds. Refactor some constants associated with mutable shares. refs #999 david-sarah@jacaranda.org**20110928052324 Ignore-this: bce0ac02f475bcf31b0e3b340cd91198 ] [Work in progress for asyncifying the backend interface (necessary to call txaws methods that return Deferreds). This is incomplete so lots of tests fail. refs #999 david-sarah@jacaranda.org**20110927073903 Ignore-this: ebdc6c06c3baa9460af128ec8f5b418b ] [mutable/publish.py: don't crash if there are no writers in _report_verinfo. refs #999 david-sarah@jacaranda.org**20110928014126 Ignore-this: 9999c82bb3057f755a6e86baeafb8a39 ] [scripts/debug.py: fix incorrect arguments to dump_immutable_share. refs #999 david-sarah@jacaranda.org**20110928014049 Ignore-this: 1078ee3f06a2f36b29e0cf694d2851cd ] [test_system.py: more debug output for a failing check in test_filesystem. refs #999 david-sarah@jacaranda.org**20110928014019 Ignore-this: e8bb77b8f7db12db7cd69efb6e0ed130 ] [test_system.py: incorrect arguments were being passed to the constructor for MutableDiskShare. refs #999 david-sarah@jacaranda.org**20110928013857 Ignore-this: e9719f74e7e073e37537f9a71614b8a0 ] [Undo an incompatible change to RIStorageServer. refs #999 david-sarah@jacaranda.org**20110928013729 Ignore-this: bea4c0f6cb71202fab942cd846eab693 ] [mutable/publish.py: resolve conflicting patches. refs #999 david-sarah@jacaranda.org**20110927073530 Ignore-this: 6154a113723dc93148151288bd032439 ] [test_storage.py: fix test_no_st_blocks. refs #999 david-sarah@jacaranda.org**20110927072848 Ignore-this: 5f12b784920f87d09c97c676d0afa6f8 ] [Cleanups to S3 backend (not including Deferred changes). refs #999 david-sarah@jacaranda.org**20110927071855 Ignore-this: f0dca788190d92b1edb1ee1498fb34dc ] [Cleanups to disk backend. refs #999 david-sarah@jacaranda.org**20110927071544 Ignore-this: e9d3fd0e85aaf301c04342fffdc8f26 ] [test_storage.py: fix test_status_bad_disk_stats. refs #999 david-sarah@jacaranda.org**20110927071403 Ignore-this: 6108fee69a60962be2df2ad11b483a11 ] [util/deferredutil.py: add some utilities for asynchronous iteration. refs #999 david-sarah@jacaranda.org**20110927070947 Ignore-this: ac4946c1e5779ea64b85a1a420d34c9e ] [Add 'has-immutable-readv' to server version information. refs #999 david-sarah@jacaranda.org**20110923220935 Ignore-this: c3c4358f2ab8ac503f99c968ace8efcf ] [Minor cleanup to disk backend. refs #999 david-sarah@jacaranda.org**20110923205510 Ignore-this: 79f92d7c2edb14cfedb167247c3f0d08 ] [Update the S3 backend. refs #999 david-sarah@jacaranda.org**20110923205345 Ignore-this: 5ca623a17e09ddad4cab2f51b49aec0a ] [Update the null backend to take into account interface changes. Also, it now records which shares are present, but not their contents. refs #999 david-sarah@jacaranda.org**20110923205219 Ignore-this: 42a23d7e253255003dc63facea783251 ] [Make EmptyShare.check_testv a simple function. refs #999 david-sarah@jacaranda.org**20110923204945 Ignore-this: d0132c085f40c39815fa920b77fc39ab ] [The cancel secret needs to be unique, even if it isn't explicitly provided. refs #999 david-sarah@jacaranda.org**20110923204914 Ignore-this: 6c44bb908dd4c0cdc59506b2d87a47b0 ] [Implement readv for immutable shares. refs #999 david-sarah@jacaranda.org**20110923204611 Ignore-this: 24f14b663051169d66293020e40c5a05 ] [Remove redundant si_s argument from check_write_enabler. refs #999 david-sarah@jacaranda.org**20110923204425 Ignore-this: 25be760118dbce2eb661137f7d46dd20 ] [interfaces.py: add fill_in_space_stats method to IStorageBackend. refs #999 david-sarah@jacaranda.org**20110923203723 Ignore-this: 59371c150532055939794fed6c77dcb6 ] [Add incomplete S3 backend. refs #999 david-sarah@jacaranda.org**20110923041314 Ignore-this: b48df65699e3926dcbb87b5f755cdbf1 ] [Move advise_corrupt_share to allmydata/storage/backends/base.py, since it will be common to the disk and S3 backends. refs #999 david-sarah@jacaranda.org**20110923041115 Ignore-this: 782b49f243bd98fcb6c249f8e40fd9f ] [A few comment cleanups. refs #999 david-sarah@jacaranda.org**20110923041003 Ignore-this: f574b4a3954b6946016646011ad15edf ] [mutable/publish.py: elements should not be removed from a dictionary while it is being iterated over. refs #393 david-sarah@jacaranda.org**20110923040825 Ignore-this: 135da94bd344db6ccd59a576b54901c1 ] [Blank line cleanups. david-sarah@jacaranda.org**20110923012044 Ignore-this: 8e1c4ecb5b0c65673af35872876a8591 ] [Reinstate the cancel_lease methods of ImmutableDiskShare and MutableDiskShare, since they are needed for lease expiry. refs #999 david-sarah@jacaranda.org**20110922183323 Ignore-this: a11fb0dd0078ff627cb727fc769ec848 ] [Fix most of the crawler tests. refs #999 david-sarah@jacaranda.org**20110922183008 Ignore-this: 116c0848008f3989ba78d87c07ec783c ] [Fix some more test failures. refs #999 david-sarah@jacaranda.org**20110922045451 Ignore-this: b726193cbd03a7c3d343f6e4a0f33ee7 ] [uri.py: resolve a conflict between trunk and the pluggable-backends patches. refs #999 david-sarah@jacaranda.org**20110921222038 Ignore-this: ffeeab60d8e71a6a29a002d024d76fcf ] [Fix more shallow bugs, mainly FilePathification. Also, remove the max_space_per_bucket parameter from BucketWriter since it can be obtained from the _max_size attribute of the share (via a new get_allocated_size() accessor). refs #999 david-sarah@jacaranda.org**20110921221421 Ignore-this: 600e3ccef8533aa43442fa576c7d88cf ] [More fixes to tests needed for pluggable backends. refs #999 david-sarah@jacaranda.org**20110921184649 Ignore-this: 9be0d3a98e350fd4e17a07d2c00bb4ca ] [docs/backends/S3.rst, disk.rst: describe type of space settings as 'quantity of space', not 'str'. refs #999 david-sarah@jacaranda.org**20110921031705 Ignore-this: a74ed8e01b0a1ab5f07a1487d7bf138 ] [docs/backends/S3.rst: remove Issues section. refs #999 david-sarah@jacaranda.org**20110921031625 Ignore-this: c83d8f52b790bc32488869e6ee1df8c2 ] [Fix some incorrect attribute accesses. refs #999 david-sarah@jacaranda.org**20110921031207 Ignore-this: f1ea4c3ea191f6d4b719afaebd2b2bcd ] [docs/backends: document the configuration options for the pluggable backends scheme. refs #999 david-sarah@jacaranda.org**20110920171737 Ignore-this: 5947e864682a43cb04e557334cda7c19 ] [Work-in-progress, includes fix to bug involving BucketWriter. refs #999 david-sarah@jacaranda.org**20110920033803 Ignore-this: 64e9e019421454e4d08141d10b6e4eed ] [Pluggable backends -- all other changes. refs #999 david-sarah@jacaranda.org**20110919233256 Ignore-this: 1a77b6b5d178b32a9b914b699ba7e957 ] [Pluggable backends -- new and moved files, changes to moved files. refs #999 david-sarah@jacaranda.org**20110919232926 Ignore-this: ec5d2d1362a092d919e84327d3092424 ] [interfaces.py: 'which -> that' grammar cleanup. david-sarah@jacaranda.org**20110825003217 Ignore-this: a3e15f3676de1b346ad78aabdfb8cac6 ] [test/test_runner.py: BinTahoe.test_path has rare nondeterministic failures; this patch probably fixes a problem where the actual cause of failure is masked by a string conversion error. david-sarah@jacaranda.org**20110927225336 Ignore-this: 6f1ad68004194cc9cea55ace3745e4af ] [docs/configuration.rst: add section about the types of node, and clarify when setting web.port enables web-API service. fixes #1444 zooko@zooko.com**20110926203801 Ignore-this: ab94d470c68e720101a7ff3c207a719e ] [TAG allmydata-tahoe-1.9.0a2 warner@lothar.com**20110925234811 Ignore-this: e9649c58f9c9017a7d55008938dba64f ] Patch bundle hash: 3819636e5b64e4cb9d41f1961b1036ad97f1d021