2 patches for repository http://tahoe-lafs.org/source/tahoe/trunk: Thu Aug 25 01:16:34 BST 2011 david-sarah@jacaranda.org * Enforce zope interfaces (relative to trunk). refs #1474 Thu Sep 15 03:38:00 BST 2011 david-sarah@jacaranda.org * Rerecording of pluggable backend patches by David-Sarah. refs #999 New patches: [Enforce zope interfaces (relative to trunk). refs #1474 david-sarah@jacaranda.org**20110825001634 Ignore-this: 34ab9f2c433e5b8fee2eeacf42690ca9 ] { hunk ./src/allmydata/check_results.py 2 -from zope.interface import implements +from allmydata.util.interfaceutil import implements from allmydata.interfaces import ICheckResults, ICheckAndRepairResults, \ IDeepCheckResults, IDeepCheckAndRepairResults, IURI from allmydata.util import base32 hunk ./src/allmydata/client.py 5 from allmydata.interfaces import RIStorageServer from allmydata import node -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.internet import reactor, defer from twisted.application import service from twisted.application.internet import TimerService hunk ./src/allmydata/codec.py 3 # -*- test-case-name: allmydata.test.test_encode_share -*- -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.internet import defer from allmydata.util import mathutil from allmydata.util.assertutil import precondition hunk ./src/allmydata/control.py 3 import os, time -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.application import service from twisted.internet import defer from twisted.internet.interfaces import IConsumer hunk ./src/allmydata/dirnode.py 4 import time, math, unicodedata -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.internet import defer from foolscap.api import fireEventually import simplejson hunk ./src/allmydata/frontends/auth.py 2 import os -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.web.client import getPage from twisted.internet import defer from twisted.cred import error, checkers, credentials hunk ./src/allmydata/frontends/ftpd.py 2 -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.application import service, strports from twisted.internet import defer from twisted.internet.interfaces import IConsumer hunk ./src/allmydata/frontends/sftpd.py 7 from stat import S_IFREG, S_IFDIR from time import time, strftime, localtime -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.python import components from twisted.application import service, strports from twisted.conch.ssh import factory, keys, session hunk ./src/allmydata/immutable/checker.py 1 -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.internet import defer from foolscap.api import DeadReferenceError, RemoteException from allmydata import hashtree, codec, uri hunk ./src/allmydata/immutable/downloader/node.py 4 import time now = time.time -from zope.interface import Interface +from allmydata.util.interfaceutil import Interface from twisted.python.failure import Failure from twisted.internet import defer from foolscap.api import eventually hunk ./src/allmydata/immutable/downloader/segmentation.py 4 import time now = time.time -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.internet import defer from twisted.internet.interfaces import IPushProducer from foolscap.api import eventually hunk ./src/allmydata/immutable/downloader/status.py 3 import itertools -from zope.interface import implements +from allmydata.util.interfaceutil import implements from allmydata.interfaces import IDownloadStatus class ReadEvent: hunk ./src/allmydata/immutable/encode.py 4 # -*- test-case-name: allmydata.test.test_encode -*- import time -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.internet import defer from foolscap.api import fireEventually from allmydata import uri hunk ./src/allmydata/immutable/filenode.py 6 import copy import time now = time.time -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.internet import defer from allmydata import uri hunk ./src/allmydata/immutable/layout.py 2 import struct -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.internet import defer from allmydata.interfaces import IStorageBucketWriter, IStorageBucketReader, \ FileTooLargeError, HASH_SIZE hunk ./src/allmydata/immutable/literal.py 2 from cStringIO import StringIO -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.internet import defer from twisted.internet.interfaces import IPushProducer from twisted.protocols import basic hunk ./src/allmydata/immutable/offloaded.py 3 import os, stat, time, weakref -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.internet import defer from foolscap.api import Referenceable, DeadReferenceError, eventually import allmydata # for __full_version__ hunk ./src/allmydata/immutable/repairer.py 1 -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.internet import defer from allmydata.storage.server import si_b2a from allmydata.util import log, consumer hunk ./src/allmydata/immutable/upload.py 2 import os, time, weakref, itertools -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.python import failure from twisted.internet import defer from twisted.application import service hunk ./src/allmydata/interfaces.py 2 -from zope.interface import Interface +from allmydata.util.interfaceutil import Interface from foolscap.api import StringConstraint, ListOf, TupleOf, SetOf, DictOf, \ ChoiceOf, IntegerConstraint, Any, RemoteInterface, Referenceable hunk ./src/allmydata/introducer/client.py 3 from base64 import b32decode -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.application import service from foolscap.api import Referenceable, SturdyRef, eventually from allmydata.interfaces import InsufficientVersionError hunk ./src/allmydata/introducer/interfaces.py 2 -from zope.interface import Interface +from allmydata.util.interfaceutil import Interface from foolscap.api import StringConstraint, TupleOf, SetOf, DictOf, Any, \ RemoteInterface FURL = StringConstraint(1000) hunk ./src/allmydata/introducer/server.py 4 import time, os.path from base64 import b32decode -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.application import service from foolscap.api import Referenceable, SturdyRef import allmydata hunk ./src/allmydata/key_generator.py 6 import time from foolscap.api import Referenceable, Tub -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.internet import reactor from twisted.application import service from allmydata.util import log hunk ./src/allmydata/manhole.py 13 from twisted.conch.insults import insults from twisted.internet import protocol -from zope.interface import implements +from allmydata.util.interfaceutil import implements # makeTelnetProtocol and _TelnetRealm are for the TelnetManhole hunk ./src/allmydata/monitor.py 2 -from zope.interface import Interface, implements +from allmydata.util.interfaceutil import Interface, implements from allmydata.util import observer class IMonitor(Interface): hunk ./src/allmydata/mutable/filenode.py 4 import random -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.internet import defer, reactor from foolscap.api import eventually from allmydata.interfaces import IMutableFileNode, ICheckable, ICheckResults, \ hunk ./src/allmydata/mutable/layout.py 9 from allmydata.util import mathutil from twisted.python import failure from twisted.internet import defer -from zope.interface import implements +from allmydata.util.interfaceutil import implements # These strings describe the format of the packed structs they help process hunk ./src/allmydata/mutable/publish.py 6 import os, time from StringIO import StringIO from itertools import count -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.internet import defer from twisted.python import failure from allmydata.interfaces import IPublishStatus, SDMF_VERSION, MDMF_VERSION, \ hunk ./src/allmydata/mutable/repairer.py 2 -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.internet import defer from allmydata.interfaces import IRepairResults, ICheckResults from allmydata.mutable.publish import MutableData hunk ./src/allmydata/mutable/retrieve.py 4 import time from itertools import count -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.internet import defer from twisted.python import failure from twisted.internet.interfaces import IPushProducer, IConsumer hunk ./src/allmydata/mutable/servermap.py 3 import sys, time -from zope.interface import implements +from allmydata.util.interfaceutil import implements from itertools import count from twisted.internet import defer from twisted.python import failure hunk ./src/allmydata/nodemaker.py 2 import weakref -from zope.interface import implements +from allmydata.util.interfaceutil import implements from allmydata.util.assertutil import precondition from allmydata.interfaces import INodeMaker, SDMF_VERSION from allmydata.immutable.literal import LiteralFileNode hunk ./src/allmydata/stats.py 11 from twisted.internet import reactor from twisted.application import service from twisted.application.internet import TimerService -from zope.interface import implements +from allmydata.util.interfaceutil import implements from foolscap.api import eventually, DeadReferenceError, Referenceable, Tub from allmydata.util import log hunk ./src/allmydata/storage/immutable.py 5 from foolscap.api import Referenceable -from zope.interface import implements +from allmydata.util.interfaceutil import implements from allmydata.interfaces import RIBucketWriter, RIBucketReader from allmydata.util import base32, fileutil, log from allmydata.util.assertutil import precondition hunk ./src/allmydata/storage/server.py 6 from foolscap.api import Referenceable from twisted.application import service -from zope.interface import implements +from allmydata.util.interfaceutil import implements from allmydata.interfaces import RIStorageServer, IStatsProducer from allmydata.util import fileutil, idlib, log, time_format import allmydata # for __full_version__ hunk ./src/allmydata/storage_client.py 33 import time -from zope.interface import implements, Interface +from allmydata.util.interfaceutil import implements, Interface from foolscap.api import eventually from allmydata.interfaces import IStorageBroker from allmydata.util import idlib, log hunk ./src/allmydata/test/bench_dirnode.py 5 from pyutil import benchutil, randutil # http://tahoe-lafs.org/trac/pyutil -from zope.interface import implements +from allmydata.util.interfaceutil import implements from allmydata import dirnode, uri from allmydata.interfaces import IFileNode from allmydata.mutable.filenode import MutableFileNode hunk ./src/allmydata/test/common.py 2 import os, random, struct -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.internet import defer from twisted.internet.interfaces import IPullProducer from twisted.python import failure hunk ./src/allmydata/test/no_network.py 17 # or the control.furl . import os.path -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.application import service from twisted.internet import defer, reactor from twisted.python.failure import Failure hunk ./src/allmydata/test/test_dirnode.py 3 import time import unicodedata -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.trial import unittest from twisted.internet import defer from twisted.internet.interfaces import IConsumer hunk ./src/allmydata/test/test_encode.py 1 -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.trial import unittest from twisted.internet import defer from twisted.python.failure import Failure merger 0.0 ( hunk ./src/allmydata/test/test_mutable.py 6 -from twisted.internet.interfaces import IConsumer -from zope.interface import implements hunk ./src/allmydata/test/test_mutable.py 7 -from zope.interface import implements +from allmydata.util.interfaceutil import implements ) hunk ./src/allmydata/test/test_provisioning.py 11 pass # might not be importable, since it needs NumPy from nevow import inevow -from zope.interface import implements +from allmydata.util.interfaceutil import implements class MyRequest: implements(inevow.IRequest) hunk ./src/allmydata/unknown.py 2 -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.internet import defer from allmydata.interfaces import IFilesystemNode, MustNotBeUnknownRWError, \ MustBeDeepImmutableError hunk ./src/allmydata/uri.py 3 import re, urllib -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.python.components import registerAdapter from allmydata.storage.server import si_a2b, si_b2a from allmydata.util import base32, hashutil hunk ./src/allmydata/util/consumer.py 6 a filenode's read() method. See download_to_data() for an example of its use. """ -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.internet.interfaces import IConsumer class MemoryConsumer: addfile ./src/allmydata/util/interfaceutil.py hunk ./src/allmydata/util/interfaceutil.py 1 + +import sys +from zope.interface import * +from zope.interface.verify import verifyClass +from zope.interface.advice import addClassAdvisor + + +def implements(*interfaces): + frame = sys._getframe(1) + f_locals = frame.f_locals + + # Try to make sure we were called from a class def. Assumes Python > 2.2. + if f_locals is frame.f_globals or '__module__' not in f_locals: + raise TypeError("implements can be used only from a class definition.") + + if '__implements_advice_data__' in f_locals: + raise TypeError("implements can be used only once in a class definition.") + + def _implements_advice(cls): + interfaces, classImplements = cls.__dict__['__implements_advice_data__'] + del cls.__implements_advice_data__ + classImplements(cls, *interfaces) + + if not cls.__name__.startswith('_'): + for interface in interfaces: + try: + verifyClass(interface, cls) + except Exception, e: + print >>sys.stderr, "%s does not implement %s\n%s" % (cls, interface, e) + return cls + + f_locals['__implements_advice_data__'] = interfaces, classImplements + addClassAdvisor(_implements_advice, depth=2) hunk ./src/allmydata/web/common.py 5 import simplejson from twisted.web import http, server from twisted.python import log -from zope.interface import Interface +from allmydata.util.interfaceutil import Interface from nevow import loaders, appserver from nevow.inevow import IRequest from nevow.util import resource_filename hunk ./src/allmydata/web/directory.py 5 import simplejson import urllib -from zope.interface import implements +from allmydata.util.interfaceutil import implements from twisted.internet import defer from twisted.internet.interfaces import IPushProducer from twisted.python.failure import Failure hunk ./src/allmydata/web/operations.py 3 import time -from zope.interface import implements +from allmydata.util.interfaceutil import implements from nevow import rend, url, tags as T from nevow.inevow import IRequest from twisted.python.failure import Failure } [Rerecording of pluggable backend patches by David-Sarah. refs #999 david-sarah@jacaranda.org**20110915023800 Ignore-this: 7b0121a99a0ac9fc862960138be37e0 ] { adddir ./src/allmydata/storage/backends adddir ./src/allmydata/storage/backends/disk move ./src/allmydata/storage/immutable.py ./src/allmydata/storage/backends/disk/immutable.py move ./src/allmydata/storage/mutable.py ./src/allmydata/storage/backends/disk/mutable.py adddir ./src/allmydata/storage/backends/null hunk ./docs/garbage-collection.rst 177 use this parameter to implement it. This key is only valid when age-based expiration is in use (i.e. when - ``expire.mode = age`` is used). It will be rejected if cutoff-date + ``expire.mode = age`` is used). It will be ignored if cutoff-date expiration is in use. ``expire.cutoff_date = (date string, required if mode=cutoff-date)`` hunk ./docs/garbage-collection.rst 196 the last renewal time and the cutoff date. This key is only valid when cutoff-based expiration is in use (i.e. when - "expire.mode = cutoff-date"). It will be rejected if age-based expiration + "expire.mode = cutoff-date"). It will be ignored if age-based expiration is in use. expire.immutable = (boolean, optional) hunk ./src/allmydata/client.py 245 sharetypes.append("immutable") if self.get_config("storage", "expire.mutable", True, boolean=True): sharetypes.append("mutable") - expiration_sharetypes = tuple(sharetypes) hunk ./src/allmydata/client.py 246 + expiration_policy = { + 'enabled': expire, + 'mode': mode, + 'override_lease_duration': o_l_d, + 'cutoff_date': cutoff_date, + 'sharetypes': tuple(sharetypes), + } ss = StorageServer(storedir, self.nodeid, reserved_space=reserved, discard_storage=discard, hunk ./src/allmydata/client.py 258 readonly_storage=readonly, stats_provider=self.stats_provider, - expiration_enabled=expire, - expiration_mode=mode, - expiration_override_lease_duration=o_l_d, - expiration_cutoff_date=cutoff_date, - expiration_sharetypes=expiration_sharetypes) + expiration_policy=expiration_policy) self.add_service(ss) d = self.when_tub_ready() hunk ./src/allmydata/interfaces.py 29 Number = IntegerConstraint(8) # 2**(8*8) == 16EiB ~= 18e18 ~= 18 exabytes Offset = Number ReadSize = int # the 'int' constraint is 2**31 == 2Gib -- large files are processed in not-so-large increments -WriteEnablerSecret = Hash # used to protect mutable bucket modifications -LeaseRenewSecret = Hash # used to protect bucket lease renewal requests -LeaseCancelSecret = Hash # used to protect bucket lease cancellation requests +WriteEnablerSecret = Hash # used to protect mutable share modifications +LeaseRenewSecret = Hash # used to protect lease renewal requests +LeaseCancelSecret = Hash # used to protect lease cancellation requests class RIStubClient(RemoteInterface): """Each client publishes a service announcement for a dummy object called hunk ./src/allmydata/interfaces.py 106 sharenums=SetOf(int, maxLength=MAX_BUCKETS), allocated_size=Offset, canary=Referenceable): """ - @param storage_index: the index of the bucket to be created or + @param storage_index: the index of the shareset to be created or increfed. @param sharenums: these are the share numbers (probably between 0 and 99) that the sender is proposing to store on this hunk ./src/allmydata/interfaces.py 111 server. - @param renew_secret: This is the secret used to protect bucket refresh + @param renew_secret: This is the secret used to protect lease renewal. This secret is generated by the client and stored for later comparison by the server. Each server is given a different secret. hunk ./src/allmydata/interfaces.py 115 - @param cancel_secret: Like renew_secret, but protects bucket decref. - @param canary: If the canary is lost before close(), the bucket is + @param cancel_secret: ignored + @param canary: If the canary is lost before close(), the allocation is deleted. @return: tuple of (alreadygot, allocated), where alreadygot is what we already have and allocated is what we hereby agree to accept. hunk ./src/allmydata/interfaces.py 129 renew_secret=LeaseRenewSecret, cancel_secret=LeaseCancelSecret): """ - Add a new lease on the given bucket. If the renew_secret matches an + Add a new lease on the given shareset. If the renew_secret matches an existing lease, that lease will be renewed instead. If there is no hunk ./src/allmydata/interfaces.py 131 - bucket for the given storage_index, return silently. (note that in + shareset for the given storage_index, return silently. (Note that in tahoe-1.3.0 and earlier, IndexError was raised if there was no hunk ./src/allmydata/interfaces.py 133 - bucket) + shareset.) """ return Any() # returns None now, but future versions might change hunk ./src/allmydata/interfaces.py 139 def renew_lease(storage_index=StorageIndex, renew_secret=LeaseRenewSecret): """ - Renew the lease on a given bucket, resetting the timer to 31 days. - Some networks will use this, some will not. If there is no bucket for + Renew the lease on a given shareset, resetting the timer to 31 days. + Some networks will use this, some will not. If there is no shareset for the given storage_index, IndexError will be raised. For mutable shares, if the given renew_secret does not match an hunk ./src/allmydata/interfaces.py 146 existing lease, IndexError will be raised with a note listing the server-nodeids on the existing leases, so leases on migrated shares - can be renewed or cancelled. For immutable shares, IndexError - (without the note) will be raised. + can be renewed. For immutable shares, IndexError (without the note) + will be raised. """ return Any() hunk ./src/allmydata/interfaces.py 154 def get_buckets(storage_index=StorageIndex): return DictOf(int, RIBucketReader, maxKeys=MAX_BUCKETS) - - def slot_readv(storage_index=StorageIndex, shares=ListOf(int), readv=ReadVector): """Read a vector from the numbered shares associated with the given hunk ./src/allmydata/interfaces.py 163 def slot_testv_and_readv_and_writev(storage_index=StorageIndex, secrets=TupleOf(WriteEnablerSecret, - LeaseRenewSecret, - LeaseCancelSecret), + LeaseRenewSecret), tw_vectors=TestAndWriteVectorsForShares, r_vector=ReadVector, ): hunk ./src/allmydata/interfaces.py 167 - """General-purpose test-and-set operation for mutable slots. Perform - a bunch of comparisons against the existing shares. If they all pass, - then apply a bunch of write vectors to those shares. Then use the - read vectors to extract data from all the shares and return the data. + """ + General-purpose atomic test-read-and-set operation for mutable slots. + Perform a bunch of comparisons against the existing shares. If they + all pass: use the read vectors to extract data from all the shares, + then apply a bunch of write vectors to those shares. Return the read + data, which does not include any modifications made by the writes. This method is, um, large. The goal is to allow clients to update all the shares associated with a mutable file in a single round trip. hunk ./src/allmydata/interfaces.py 177 - @param storage_index: the index of the bucket to be created or + @param storage_index: the index of the shareset to be created or increfed. @param write_enabler: a secret that is stored along with the slot. Writes are accepted from any caller who can hunk ./src/allmydata/interfaces.py 183 present the matching secret. A different secret should be used for each slot*server pair. - @param renew_secret: This is the secret used to protect bucket refresh + @param renew_secret: This is the secret used to protect lease renewal. This secret is generated by the client and stored for later comparison by the server. Each server is given a different secret. hunk ./src/allmydata/interfaces.py 187 - @param cancel_secret: Like renew_secret, but protects bucket decref. + @param cancel_secret: ignored hunk ./src/allmydata/interfaces.py 189 - The 'secrets' argument is a tuple of (write_enabler, renew_secret, - cancel_secret). The first is required to perform any write. The - latter two are used when allocating new shares. To simply acquire a - new lease on existing shares, use an empty testv and an empty writev. + The 'secrets' argument is a tuple with (write_enabler, renew_secret). + The write_enabler is required to perform any write. The renew_secret + is used when allocating new shares. Each share can have a separate test vector (i.e. a list of comparisons to perform). If all vectors for all shares pass, then all hunk ./src/allmydata/interfaces.py 280 store that on disk. """ + +class IStorageBackend(Interface): + """ + Objects of this kind live on the server side and are used by the + storage server object. + """ + def get_available_space(reserved_space): + """ + Returns available space for share storage in bytes, or + None if this information is not available or if the available + space is unlimited. + + If the backend is configured for read-only mode then this will + return 0. + + reserved_space is how many bytes to subtract from the answer, so + you can pass how many bytes you would like to leave unused on this + filesystem as reserved_space. + """ + + def get_sharesets_for_prefix(prefix): + """ + Generates IShareSet objects for all storage indices matching the + given prefix for which this backend holds shares. + """ + + def get_shareset(storageindex): + """ + Get an IShareSet object for the given storage index. + """ + + def advise_corrupt_share(storageindex, sharetype, shnum, reason): + """ + Clients who discover hash failures in shares that they have + downloaded from me will use this method to inform me about the + failures. I will record their concern so that my operator can + manually inspect the shares in question. + + 'sharetype' is either 'mutable' or 'immutable'. 'shnum' is the integer + share number. 'reason' is a human-readable explanation of the problem, + probably including some expected hash values and the computed ones + that did not match. Corruption advisories for mutable shares should + include a hash of the public key (the same value that appears in the + mutable-file verify-cap), since the current share format does not + store that on disk. + + @param sharetype=str + @param shnum=int + @param reason=str + """ + + +class IShareSet(Interface): + def get_storage_index(): + """ + Returns the storage index for this shareset. + """ + + def get_storage_index_string(): + """ + Returns the base32-encoded storage index for this shareset. + """ + + def get_overhead(): + """ + Returns the storage overhead, in bytes, of this shareset (exclusive + of the space used by its shares). + """ + + def get_shares(): + """ + Generates the IStoredShare objects held in this shareset. + """ + + def get_incoming_shnums(): + """ + Return a frozenset of the shnums (as ints) of incoming shares. + """ + + def make_bucket_writer(storageserver, shnum, max_space_per_bucket, lease_info, canary): + """ + Create a bucket writer that can be used to write data to a given share. + + @param storageserver=RIStorageServer + @param shnum=int: A share number in this shareset + @param max_space_per_bucket=int: The maximum space allocated for the + share, in bytes + @param lease_info=LeaseInfo: The initial lease information + @param canary=Referenceable: If the canary is lost before close(), the + bucket is deleted. + @return an IStorageBucketWriter for the given share + """ + + def make_bucket_reader(storageserver, share): + """ + Create a bucket reader that can be used to read data from a given share. + + @param storageserver=RIStorageServer + @param share=IStoredShare + @return an IStorageBucketReader for the given share + """ + + def readv(wanted_shnums, read_vector): + """ + Read a vector from the numbered shares in this shareset. An empty + wanted_shnums list means to return data from all known shares. + + @param wanted_shnums=ListOf(int) + @param read_vector=ReadVector + @return DictOf(int, ReadData): shnum -> results, with one key per share + """ + + def testv_and_readv_and_writev(secrets, test_and_write_vectors, read_vector, expiration_time): + """ + General-purpose atomic test-read-and-set operation for mutable slots. + Perform a bunch of comparisons against the existing shares in this + shareset. If they all pass: use the read vectors to extract data from + all the shares, then apply a bunch of write vectors to those shares. + Return the read data, which does not include any modifications made by + the writes. + + See the similar method in RIStorageServer for more detail. + + @param secrets=TupleOf(WriteEnablerSecret, LeaseRenewSecret[, ...]) + @param test_and_write_vectors=TestAndWriteVectorsForShares + @param read_vector=ReadVector + @param expiration_time=int + @return TupleOf(bool, DictOf(int, ReadData)) + """ + + def add_or_renew_lease(self, lease_info): + """ + Add a new lease on the shares in this shareset. If the renew_secret + matches an existing lease, that lease will be renewed instead. If + there are no shares in this shareset, return silently. (Note that + in Tahoe-LAFS v1.3.0 and earlier, IndexError was raised if there were + no shares with this shareset's storage index.) + + @param lease_info=LeaseInfo + """ + + def renew_lease(renew_secret, new_expiration_time): + """ + Renew a lease on the shares in this shareset, resetting the timer + to 31 days. Some grids will use this, some will not. If there are no + shares in this shareset, IndexError will be raised. + + For mutable shares, if the given renew_secret does not match an + existing lease, IndexError will be raised with a note listing the + server-nodeids on the existing leases, so leases on migrated shares + can be renewed. For immutable shares, IndexError (without the note) + will be raised. + + @param renew_secret=LeaseRenewSecret + """ + + +class IStoredShare(Interface): + """ + This object contains as much as all of the share data. It is intended + for lazy evaluation, such that in many use cases substantially less than + all of the share data will be accessed. + """ + def is_complete(): + """ + Returns True if the share has been fully written and closed, False if it + exists but is still open, or None if the share does not exist. + """ + + def close(): + """ + Complete writing to this share. + """ + + def get_size(): + """ + Returns the size of the share in bytes. + """ + + def get_used_space(): + """ + Returns the amount of backend storage including overhead, in bytes, used + by this share. + """ + + def get_shnum(): + """ + Returns the share number. + """ + + def unlink(): + """ + Signal that this share can be removed from the backend storage. This does + not guarantee that the share data will be immediately inaccessible, or + that it will be securely erased. + """ + + def read_share_data(offset, length): + """ + Reads beyond the end of the data are truncated. Reads that start + beyond the end of the data return an empty string. + @param offset=int + @param length=int + @return str + """ + + def write_share_data(offset, data): + """ + @param offset=int + @param data=str + """ + + class IStorageBucketWriter(Interface): """ Objects of this kind live on the client side. hunk ./src/allmydata/interfaces.py 497 """ - def put_block(segmentnum=int, data=ShareData): - """@param data: For most segments, this data will be 'blocksize' + def put_block(segmentnum, data): + """ + @param segmentnum=int + @param data=ShareData: For most segments, this data will be 'blocksize' bytes in length. The last segment might be shorter. @return: a Deferred that fires (with None) when the operation completes """ hunk ./src/allmydata/interfaces.py 530 of plaintext, crypttext, and shares), as well as encoding parameters that are necessary to recover the data. This is a serialized dict mapping strings to other strings. The hash of this data is kept in - the URI and verified before any of the data is used. All buckets for - a given file contain identical copies of this data. + the URI and verified before any of the data is used. All share + containers for a given file contain identical copies of this data. The serialization format is specified with the following pseudocode: for k in sorted(dict.keys()): hunk ./src/allmydata/interfaces.py 1777 Block Hash, and the encoding parameters, both of which must be included in the URI. - I do not choose shareholders, that is left to the IUploader. I must be - given a dict of RemoteReferences to storage buckets that are ready and - willing to receive data. + I do not choose shareholders, that is left to the IUploader. """ def set_size(size): hunk ./src/allmydata/interfaces.py 1784 """Specify the number of bytes that will be encoded. This must be peformed before get_serialized_params() can be called. """ + def set_params(params): """Override the default encoding parameters. 'params' is a tuple of (k,d,n), where 'k' is the number of required shares, 'd' is the hunk ./src/allmydata/interfaces.py 1880 download, validate, decode, and decrypt data from them, writing the results to an output file. - I do not locate the shareholders, that is left to the IDownloader. I must - be given a dict of RemoteReferences to storage buckets that are ready to - send data. + I do not locate the shareholders, that is left to the IDownloader. """ def setup(outfile): addfile ./src/allmydata/storage/backends/__init__.py addfile ./src/allmydata/storage/backends/base.py hunk ./src/allmydata/storage/backends/base.py 1 + +from twisted.application import service + +from allmydata.util.interfaceutil import implements +from allmydata.interfaces import IShareSet +from allmydata.storage.common import si_b2a +from allmydata.storage.lease import LeaseInfo +from allmydata.storage.bucket import BucketReader + + +class Backend(service.MultiService): + def __init__(self): + service.MultiService.__init__(self) + + +class ShareSet(object): + implements(IShareSet) + """ + This class implements shareset logic that could work for all backends, but + might be useful to override for efficiency. + """ + + def __init__(self, storageindex): + self.storageindex = storageindex + + def get_storage_index(self): + return self.storageindex + + def get_storage_index_string(self): + return si_b2a(self.storageindex) + + def renew_lease(self, renew_secret, new_expiration_time): + found_buckets = False + for share in self.get_shares(): + found_buckets = True + share.renew_lease(renew_secret, new_expiration_time) + + if not found_buckets: + raise IndexError("no such lease to renew") + + def get_leases(self): + # Since all shares get the same lease data, we just grab the leases + # from the first share. + try: + sf = self.get_shares().next() + return sf.get_leases() + except StopIteration: + return iter([]) + + def add_or_renew_lease(self, lease_info): + # This implementation assumes that lease data is duplicated in + # all shares of a shareset, which might not be true for all backends. + for share in self.get_shares(): + share.add_or_renew_lease(lease_info) + + def make_bucket_reader(self, storageserver, share): + return BucketReader(storageserver, share) + + def testv_and_readv_and_writev(self, storageserver, secrets, + test_and_write_vectors, read_vector, + expiration_time): + # The implementation here depends on the following helper methods, + # which must be provided by subclasses: + # + # def _clean_up_after_unlink(self): + # """clean up resources associated with the shareset after some + # shares might have been deleted""" + # + # def _create_mutable_share(self, storageserver, shnum, write_enabler): + # """create a mutable share with the given shnum and write_enabler""" + + # This previously had to be a triple with cancel_secret in secrets[2], + # but we now allow the cancel_secret to be omitted. + write_enabler = secrets[0] + renew_secret = secrets[1] + + si_s = self.get_storage_index_string() + shares = {} + for share in self.get_shares(): + # XXX is ignoring immutable shares correct? Maybe get_shares should + # have a parameter saying what type it's expecting. + if share.sharetype == "mutable": + share.check_write_enabler(write_enabler, si_s) + shares[share.get_shnum()] = share + + # write_enabler is good for all existing shares. + + # Now evaluate test vectors. + testv_is_good = True + for sharenum in test_and_write_vectors: + (testv, datav, new_length) = test_and_write_vectors[sharenum] + if sharenum in shares: + if not shares[sharenum].check_testv(testv): + self.log("testv failed: [%d]: %r" % (sharenum, testv)) + testv_is_good = False + break + else: + # compare the vectors against an empty share, in which all + # reads return empty strings. + if not EmptyShare().check_testv(testv): + self.log("testv failed (empty): [%d] %r" % (sharenum, + testv)) + testv_is_good = False + break + + # now gather the read vectors, before we do any writes + read_data = {} + for shnum, share in shares.items(): + read_data[shnum] = share.readv(read_vector) + + ownerid = 1 # TODO + lease_info = LeaseInfo(ownerid, renew_secret, + expiration_time, storageserver.get_nodeid()) + + if testv_is_good: + # now apply the write vectors + for shnum in test_and_write_vectors: + (testv, datav, new_length) = test_and_write_vectors[shnum] + if new_length == 0: + if shnum in shares: + shares[shnum].unlink() + else: + if shnum not in shares: + # allocate a new share + share = self._create_mutable_share(storageserver, shnum, write_enabler) + shares[shnum] = share + shares[shnum].writev(datav, new_length) + # and update the lease + shares[shnum].add_or_renew_lease(lease_info) + + if new_length == 0: + self._clean_up_after_unlink() + + return (testv_is_good, read_data) + + def readv(self, wanted_shnums, read_vector): + """ + Read a vector from the numbered shares in this shareset. An empty + shares list means to return data from all known shares. + + @param wanted_shnums=ListOf(int) + @param read_vector=ReadVector + @return DictOf(int, ReadData): shnum -> results, with one key per share + """ + datavs = {} + for share in self.get_shares(): + # XXX is ignoring immutable shares correct? Maybe get_shares should + # have a parameter saying what type it's expecting. + shnum = share.get_shnum() + if share.sharetype == "mutable" and (not wanted_shnums or shnum in wanted_shnums): + datavs[shnum] = share.readv(read_vector) + + return datavs + + +def testv_compare(a, op, b): + assert op in ("lt", "le", "eq", "ne", "ge", "gt") + if op == "lt": + return a < b + if op == "le": + return a <= b + if op == "eq": + return a == b + if op == "ne": + return a != b + if op == "ge": + return a >= b + if op == "gt": + return a > b + # never reached + + +class EmptyShare: + def check_testv(self, testv): + test_good = True + for (offset, length, operator, specimen) in testv: + data = "" + if not testv_compare(data, operator, specimen): + test_good = False + break + return test_good + addfile ./src/allmydata/storage/backends/disk/__init__.py addfile ./src/allmydata/storage/backends/disk/disk_backend.py hunk ./src/allmydata/storage/backends/disk/disk_backend.py 1 + +import re + +from twisted.python.filepath import FilePath, UnlistableError + +from allmydata.interfaceutil import implements +from allmydata.interfaces import IStorageBackend, IShareSet +from allmydata.util import fileutil, log, time_format +from allmydata.util.assertutil import precondition +from allmydata.storage.common import si_b2a, si_a2b +from allmydata.storage.immutable import BucketWriter +from allmydata.storage.backends.base import Backend, ShareSet +from allmydata.storage.backends.disk.immutable import ImmutableDiskShare +from allmydata.storage.backends.disk.mutable import MutableDiskShare, create_mutable_sharefile + +# storage/ +# storage/shares/incoming +# incoming/ holds temp dirs named $START/$STORAGEINDEX/$SHARENUM which will +# be moved to storage/shares/$START/$STORAGEINDEX/$SHARENUM upon success +# storage/shares/$START/$STORAGEINDEX +# storage/shares/$START/$STORAGEINDEX/$SHARENUM + +# Where "$START" denotes the first 10 bits worth of $STORAGEINDEX (that's 2 +# base-32 chars). +# $SHARENUM matches this regex: +NUM_RE=re.compile("^[0-9]+$") + + +def si_si2dir(startfp, storageindex): + sia = si_b2a(storageindex) + newfp = startfp.child(sia[:2]) + return newfp.child(sia) + + +def get_share(fp): + f = fp.open('rb') + try: + prefix = f.read(32) + finally: + f.close() + + if prefix == MutableDiskShare.MAGIC: + return MutableDiskShare(fp) + else: + # assume it's immutable + return ImmutableDiskShare(fp) + + +class DiskBackend(Backend): + implements(IStorageBackend) + + def __init__(self, storedir, expiration_policy, readonly=False, reserved_space=0): + Backend.__init__(self) + self._setup_storage(storedir, readonly, reserved_space) + self._setup_corruption_advisory() + + def _setup_storage(self, storedir, readonly, reserved_space): + precondition(isinstance(storedir, FilePath), storedir, FilePath) + self.storedir = storedir + self.readonly = readonly + self.reserved_space = int(reserved_space) + self.sharedir = self.storedir.child("shares") + fileutil.fp_make_dirs(self.sharedir) + self.incomingdir = self.sharedir.child('incoming') + self._clean_incomplete() + if self.reserved_space and (self.get_available_space() is None): + log.msg("warning: [storage]reserved_space= is set, but this platform does not support an API to get disk statistics (statvfs(2) or GetDiskFreeSpaceEx), so this reservation cannot be honored", + umid="0wZ27w", level=log.UNUSUAL) + + def _clean_incomplete(self): + fileutil.fp_remove(self.incomingdir) + fileutil.fp_make_dirs(self.incomingdir) + + def _setup_corruption_advisory(self): + # we don't actually create the corruption-advisory dir until necessary + self.corruption_advisory_dir = self.storedir.child("corruption-advisories") + + def _make_shareset(self, sharehomedir): + return self.get_shareset(si_a2b(sharehomedir.basename())) + + def get_sharesets_for_prefix(self, prefix): + prefixfp = self.sharedir.child(prefix) + try: + sharesets = map(self._make_shareset, prefixfp.children()) + def _by_base32si(b): + return b.get_storage_index_string() + sharesets.sort(key=_by_base32si) + except EnvironmentError: + sharesets = [] + return sharesets + + def get_shareset(self, storageindex): + sharehomedir = si_si2dir(self.sharedir, storageindex) + incominghomedir = si_si2dir(self.incomingdir, storageindex) + return DiskShareSet(storageindex, sharehomedir, incominghomedir) + + def fill_in_space_stats(self, stats): + try: + disk = fileutil.get_disk_stats(self.sharedir, self.reserved_space) + writeable = disk['avail'] > 0 + + # spacetime predictors should use disk_avail / (d(disk_used)/dt) + stats['storage_server.disk_total'] = disk['total'] + stats['storage_server.disk_used'] = disk['used'] + stats['storage_server.disk_free_for_root'] = disk['free_for_root'] + stats['storage_server.disk_free_for_nonroot'] = disk['free_for_nonroot'] + stats['storage_server.disk_avail'] = disk['avail'] + except AttributeError: + writeable = True + except EnvironmentError: + log.msg("OS call to get disk statistics failed", level=log.UNUSUAL) + writeable = False + + if self.readonly_storage: + stats['storage_server.disk_avail'] = 0 + writeable = False + + stats['storage_server.accepting_immutable_shares'] = int(writeable) + + def get_available_space(self): + if self.readonly: + return 0 + return fileutil.get_available_space(self.sharedir, self.reserved_space) + + #def set_storage_server(self, ss): + # self.ss = ss + + def advise_corrupt_share(self, storageindex, sharetype, shnum, reason): + fileutil.fp_make_dirs(self.corruption_advisory_dir) + now = time_format.iso_utc(sep="T") + si_s = si_b2a(storageindex) + + # Windows can't handle colons in the filename. + name = ("%s--%s-%d" % (now, si_s, shnum)).replace(":", "") + f = self.corruption_advisory_dir.child(name).open("w") + try: + f.write("report: Share Corruption\n") + f.write("type: %s\n" % sharetype) + f.write("storage_index: %s\n" % si_s) + f.write("share_number: %d\n" % shnum) + f.write("\n") + f.write(reason) + f.write("\n") + finally: + f.close() + log.msg(format=("client claims corruption in (%(share_type)s) " + + "%(si)s-%(shnum)d: %(reason)s"), + share_type=sharetype, si=si_s, shnum=shnum, reason=reason, + level=log.SCARY, umid="SGx2fA") + + +class DiskShareSet(ShareSet): + implements(IShareSet) + + def __init__(self, storageindex, sharehomedir, incominghomedir=None): + ShareSet.__init__(storageindex) + self.sharehomedir = sharehomedir + self.incominghomedir = incominghomedir + + def get_overhead(self): + return (fileutil.get_disk_usage(self.sharehomedir) + + fileutil.get_disk_usage(self.incominghomedir)) + + def get_shares(self): + """ + Generate IStorageBackendShare objects for shares we have for this storage index. + ("Shares we have" means completed ones, excluding incoming ones.) + """ + try: + for fp in self.sharehomedir.children(): + shnumstr = fp.basename() + if not NUM_RE.match(shnumstr): + continue + sharehome = self.sharehomedir.child(shnumstr) + yield self.get_share(sharehome) + except UnlistableError: + # There is no shares directory at all. + pass + + def get_incoming_shnums(self): + """ + Return a frozenset of the shnum (as ints) of incoming shares. + """ + if self.incominghomedir is None: + return frozenset() + try: + childfps = [ fp for fp in self.incominghomedir.children() if NUM_RE.match(fp.basename()) ] + shnums = [ int(fp.basename()) for fp in childfps] + return frozenset(shnums) + except UnlistableError: + # There is no incoming directory at all. + return frozenset() + + def make_bucket_writer(self, storageserver, shnum, max_space_per_bucket, lease_info, canary): + sharehome = self.sharehomedir.child(str(shnum)) + incominghome = self.incominghomedir.child(str(shnum)) + immsh = ImmutableDiskShare(self.storageindex, shnum, sharehome, incominghome, + max_size=max_space_per_bucket, create=True) + bw = BucketWriter(storageserver, immsh, max_space_per_bucket, lease_info, canary) + return bw + + def _create_mutable_share(self, storageserver, shnum, write_enabler): + fileutil.fp_make_dirs(self.sharehomedir) + sharehome = self.sharehomedir.child(str(shnum)) + nodeid = storageserver.get_nodeid() + return create_mutable_sharefile(sharehome, nodeid, write_enabler, storageserver) + + def _clean_up_after_unlink(self): + fileutil.fp_rmdir_if_empty(self.sharehomedir) + hunk ./src/allmydata/storage/backends/disk/immutable.py 1 -import os, stat, struct, time hunk ./src/allmydata/storage/backends/disk/immutable.py 2 -from foolscap.api import Referenceable +import struct from allmydata.util.interfaceutil import implements hunk ./src/allmydata/storage/backends/disk/immutable.py 5 -from allmydata.interfaces import RIBucketWriter, RIBucketReader -from allmydata.util import base32, fileutil, log + +from allmydata.interfaces import IStoredShare +from allmydata.util import fileutil from allmydata.util.assertutil import precondition hunk ./src/allmydata/storage/backends/disk/immutable.py 9 +from allmydata.util.fileutil import fp_make_dirs from allmydata.util.hashutil import constant_time_compare hunk ./src/allmydata/storage/backends/disk/immutable.py 11 +from allmydata.util.encodingutil import quote_filepath +from allmydata.storage.common import si_b2a, UnknownImmutableContainerVersionError, DataTooLargeError from allmydata.storage.lease import LeaseInfo hunk ./src/allmydata/storage/backends/disk/immutable.py 14 -from allmydata.storage.common import UnknownImmutableContainerVersionError, \ - DataTooLargeError + # each share file (in storage/shares/$SI/$SHNUM) contains lease information # and share data. The share data is accessed by RIBucketWriter.write and hunk ./src/allmydata/storage/backends/disk/immutable.py 41 # then the value stored in this field will be the actual share data length # modulo 2**32. -class ShareFile: - LEASE_SIZE = struct.calcsize(">L32s32sL") +class ImmutableDiskShare(object): + implements(IStoredShare) + sharetype = "immutable" hunk ./src/allmydata/storage/backends/disk/immutable.py 45 + LEASE_SIZE = struct.calcsize(">L32s32sL") + hunk ./src/allmydata/storage/backends/disk/immutable.py 48 - def __init__(self, filename, max_size=None, create=False): - """ If max_size is not None then I won't allow more than max_size to be written to me. If create=True and max_size must not be None. """ + def __init__(self, storageindex, shnum, finalhome=None, incominghome=None, max_size=None, create=False): + """ If max_size is not None then I won't allow more than + max_size to be written to me. If create=True then max_size + must not be None. """ precondition((max_size is not None) or (not create), max_size, create) hunk ./src/allmydata/storage/backends/disk/immutable.py 53 - self.home = filename + self.storageindex = storageindex self._max_size = max_size hunk ./src/allmydata/storage/backends/disk/immutable.py 55 + self.incominghome = incominghome + self.finalhome = finalhome + self.shnum = shnum if create: # touch the file, so later callers will see that we're working on # it. Also construct the metadata. hunk ./src/allmydata/storage/backends/disk/immutable.py 61 - assert not os.path.exists(self.home) - fileutil.make_dirs(os.path.dirname(self.home)) - f = open(self.home, 'wb') + assert not finalhome.exists() + fp_make_dirs(self.incominghome.parent()) # The second field -- the four-byte share data length -- is no # longer used as of Tahoe v1.3.0, but we continue to write it in # there in case someone downgrades a storage server from >= hunk ./src/allmydata/storage/backends/disk/immutable.py 72 # the largest length that can fit into the field. That way, even # if this does happen, the old < v1.3.0 server will still allow # clients to read the first part of the share. - f.write(struct.pack(">LLL", 1, min(2**32-1, max_size), 0)) - f.close() + self.incominghome.setContent(struct.pack(">LLL", 1, min(2**32-1, max_size), 0) ) self._lease_offset = max_size + 0x0c self._num_leases = 0 else: hunk ./src/allmydata/storage/backends/disk/immutable.py 76 - f = open(self.home, 'rb') - filesize = os.path.getsize(self.home) - (version, unused, num_leases) = struct.unpack(">LLL", f.read(0xc)) - f.close() + f = self.finalhome.open(mode='rb') + try: + (version, unused, num_leases) = struct.unpack(">LLL", f.read(0xc)) + finally: + f.close() + filesize = self.finalhome.getsize() if version != 1: msg = "sharefile %s had version %d but we wanted 1" % \ hunk ./src/allmydata/storage/backends/disk/immutable.py 84 - (filename, version) + (self.finalhome, version) raise UnknownImmutableContainerVersionError(msg) self._num_leases = num_leases self._lease_offset = filesize - (num_leases * self.LEASE_SIZE) hunk ./src/allmydata/storage/backends/disk/immutable.py 90 self._data_offset = 0xc + def __repr__(self): + return ("" + % (si_b2a(self.storageindex), self.shnum, quote_filepath(self.finalhome))) + + def close(self): + fileutil.fp_make_dirs(self.finalhome.parent()) + self.incominghome.moveTo(self.finalhome) + try: + # self.incominghome is like storage/shares/incoming/ab/abcde/4 . + # We try to delete the parent (.../ab/abcde) to avoid leaving + # these directories lying around forever, but the delete might + # fail if we're working on another share for the same storage + # index (like ab/abcde/5). The alternative approach would be to + # use a hierarchy of objects (PrefixHolder, BucketHolder, + # ShareWriter), each of which is responsible for a single + # directory on disk, and have them use reference counting of + # their children to know when they should do the rmdir. This + # approach is simpler, but relies on os.rmdir refusing to delete + # a non-empty directory. Do *not* use fileutil.rm_dir() here! + fileutil.fp_rmdir_if_empty(self.incominghome.parent()) + # we also delete the grandparent (prefix) directory, .../ab , + # again to avoid leaving directories lying around. This might + # fail if there is another bucket open that shares a prefix (like + # ab/abfff). + fileutil.fp_rmdir_if_empty(self.incominghome.parent().parent()) + # we leave the great-grandparent (incoming/) directory in place. + except EnvironmentError: + # ignore the "can't rmdir because the directory is not empty" + # exceptions, those are normal consequences of the + # above-mentioned conditions. + pass + pass + + def get_used_space(self): + return (fileutil.get_used_space(self.finalhome) + + fileutil.get_used_space(self.incominghome)) + + def get_shnum(self): + return self.shnum + def unlink(self): hunk ./src/allmydata/storage/backends/disk/immutable.py 131 - os.unlink(self.home) + self.finalhome.remove() def read_share_data(self, offset, length): precondition(offset >= 0) hunk ./src/allmydata/storage/backends/disk/immutable.py 135 - # reads beyond the end of the data are truncated. Reads that start + + # Reads beyond the end of the data are truncated. Reads that start # beyond the end of the data return an empty string. seekpos = self._data_offset+offset actuallength = max(0, min(length, self._lease_offset-seekpos)) hunk ./src/allmydata/storage/backends/disk/immutable.py 142 if actuallength == 0: return "" - f = open(self.home, 'rb') - f.seek(seekpos) - return f.read(actuallength) + f = self.finalhome.open(mode='rb') + try: + f.seek(seekpos) + sharedata = f.read(actuallength) + finally: + f.close() + return sharedata def write_share_data(self, offset, data): length = len(data) hunk ./src/allmydata/storage/backends/disk/immutable.py 155 precondition(offset >= 0, offset) if self._max_size is not None and offset+length > self._max_size: raise DataTooLargeError(self._max_size, offset, length) - f = open(self.home, 'rb+') - real_offset = self._data_offset+offset - f.seek(real_offset) - assert f.tell() == real_offset - f.write(data) - f.close() + f = self.incominghome.open(mode='rb+') + try: + real_offset = self._data_offset+offset + f.seek(real_offset) + assert f.tell() == real_offset + f.write(data) + finally: + f.close() def _write_lease_record(self, f, lease_number, lease_info): offset = self._lease_offset + lease_number * self.LEASE_SIZE hunk ./src/allmydata/storage/backends/disk/immutable.py 172 def _read_num_leases(self, f): f.seek(0x08) - (num_leases,) = struct.unpack(">L", f.read(4)) + ro = f.read(4) + (num_leases,) = struct.unpack(">L", ro) return num_leases def _write_num_leases(self, f, num_leases): hunk ./src/allmydata/storage/backends/disk/immutable.py 183 def _truncate_leases(self, f, num_leases): f.truncate(self._lease_offset + num_leases * self.LEASE_SIZE) + # These lease operations are intended for use by disk_backend.py. + # Other clients should not depend on the fact that the disk backend + # stores leases in share files. + def get_leases(self): """Yields a LeaseInfo instance for all leases.""" hunk ./src/allmydata/storage/backends/disk/immutable.py 189 - f = open(self.home, 'rb') - (version, unused, num_leases) = struct.unpack(">LLL", f.read(0xc)) - f.seek(self._lease_offset) - for i in range(num_leases): - data = f.read(self.LEASE_SIZE) - if data: - yield LeaseInfo().from_immutable_data(data) + f = self.finalhome.open(mode='rb') + try: + (version, unused, num_leases) = struct.unpack(">LLL", f.read(0xc)) + f.seek(self._lease_offset) + for i in range(num_leases): + data = f.read(self.LEASE_SIZE) + if data: + yield LeaseInfo().from_immutable_data(data) + finally: + f.close() def add_lease(self, lease_info): hunk ./src/allmydata/storage/backends/disk/immutable.py 201 - f = open(self.home, 'rb+') - num_leases = self._read_num_leases(f) - self._write_lease_record(f, num_leases, lease_info) - self._write_num_leases(f, num_leases+1) - f.close() + num_leases = self._read_num_leases(self.incominghome) + f = self.finalhome.open(mode='wb+') + try: + self._write_lease_record(f, num_leases, lease_info) + self._write_num_leases(f, num_leases+1) + finally: + f.close() def renew_lease(self, renew_secret, new_expire_time): hunk ./src/allmydata/storage/backends/disk/immutable.py 210 - for i,lease in enumerate(self.get_leases()): + for i, lease in enumerate(self.get_leases()): if constant_time_compare(lease.renew_secret, renew_secret): # yup. See if we need to update the owner time. if new_expire_time > lease.expiration_time: hunk ./src/allmydata/storage/backends/disk/immutable.py 216 # yes lease.expiration_time = new_expire_time - f = open(self.home, 'rb+') - self._write_lease_record(f, i, lease) - f.close() + f = self.finalhome.open('rb+') + try: + self._write_lease_record(f, i, lease) + finally: + f.close() return raise IndexError("unable to renew non-existent lease") hunk ./src/allmydata/storage/backends/disk/immutable.py 230 lease_info.expiration_time) except IndexError: self.add_lease(lease_info) - - - def cancel_lease(self, cancel_secret): - """Remove a lease with the given cancel_secret. If the last lease is - cancelled, the file will be removed. Return the number of bytes that - were freed (by truncating the list of leases, and possibly by - deleting the file. Raise IndexError if there was no lease with the - given cancel_secret. - """ - - leases = list(self.get_leases()) - num_leases_removed = 0 - for i,lease in enumerate(leases): - if constant_time_compare(lease.cancel_secret, cancel_secret): - leases[i] = None - num_leases_removed += 1 - if not num_leases_removed: - raise IndexError("unable to find matching lease to cancel") - if num_leases_removed: - # pack and write out the remaining leases. We write these out in - # the same order as they were added, so that if we crash while - # doing this, we won't lose any non-cancelled leases. - leases = [l for l in leases if l] # remove the cancelled leases - f = open(self.home, 'rb+') - for i,lease in enumerate(leases): - self._write_lease_record(f, i, lease) - self._write_num_leases(f, len(leases)) - self._truncate_leases(f, len(leases)) - f.close() - space_freed = self.LEASE_SIZE * num_leases_removed - if not len(leases): - space_freed += os.stat(self.home)[stat.ST_SIZE] - self.unlink() - return space_freed - - -class BucketWriter(Referenceable): - implements(RIBucketWriter) - - def __init__(self, ss, incominghome, finalhome, max_size, lease_info, canary): - self.ss = ss - self.incominghome = incominghome - self.finalhome = finalhome - self._max_size = max_size # don't allow the client to write more than this - self._canary = canary - self._disconnect_marker = canary.notifyOnDisconnect(self._disconnected) - self.closed = False - self.throw_out_all_data = False - self._sharefile = ShareFile(incominghome, create=True, max_size=max_size) - # also, add our lease to the file now, so that other ones can be - # added by simultaneous uploaders - self._sharefile.add_lease(lease_info) - - def allocated_size(self): - return self._max_size - - def remote_write(self, offset, data): - start = time.time() - precondition(not self.closed) - if self.throw_out_all_data: - return - self._sharefile.write_share_data(offset, data) - self.ss.add_latency("write", time.time() - start) - self.ss.count("write") - - def remote_close(self): - precondition(not self.closed) - start = time.time() - - fileutil.make_dirs(os.path.dirname(self.finalhome)) - fileutil.rename(self.incominghome, self.finalhome) - try: - # self.incominghome is like storage/shares/incoming/ab/abcde/4 . - # We try to delete the parent (.../ab/abcde) to avoid leaving - # these directories lying around forever, but the delete might - # fail if we're working on another share for the same storage - # index (like ab/abcde/5). The alternative approach would be to - # use a hierarchy of objects (PrefixHolder, BucketHolder, - # ShareWriter), each of which is responsible for a single - # directory on disk, and have them use reference counting of - # their children to know when they should do the rmdir. This - # approach is simpler, but relies on os.rmdir refusing to delete - # a non-empty directory. Do *not* use fileutil.rm_dir() here! - os.rmdir(os.path.dirname(self.incominghome)) - # we also delete the grandparent (prefix) directory, .../ab , - # again to avoid leaving directories lying around. This might - # fail if there is another bucket open that shares a prefix (like - # ab/abfff). - os.rmdir(os.path.dirname(os.path.dirname(self.incominghome))) - # we leave the great-grandparent (incoming/) directory in place. - except EnvironmentError: - # ignore the "can't rmdir because the directory is not empty" - # exceptions, those are normal consequences of the - # above-mentioned conditions. - pass - self._sharefile = None - self.closed = True - self._canary.dontNotifyOnDisconnect(self._disconnect_marker) - - filelen = os.stat(self.finalhome)[stat.ST_SIZE] - self.ss.bucket_writer_closed(self, filelen) - self.ss.add_latency("close", time.time() - start) - self.ss.count("close") - - def _disconnected(self): - if not self.closed: - self._abort() - - def remote_abort(self): - log.msg("storage: aborting sharefile %s" % self.incominghome, - facility="tahoe.storage", level=log.UNUSUAL) - if not self.closed: - self._canary.dontNotifyOnDisconnect(self._disconnect_marker) - self._abort() - self.ss.count("abort") - - def _abort(self): - if self.closed: - return - - os.remove(self.incominghome) - # if we were the last share to be moved, remove the incoming/ - # directory that was our parent - parentdir = os.path.split(self.incominghome)[0] - if not os.listdir(parentdir): - os.rmdir(parentdir) - self._sharefile = None - - # We are now considered closed for further writing. We must tell - # the storage server about this so that it stops expecting us to - # use the space it allocated for us earlier. - self.closed = True - self.ss.bucket_writer_closed(self, 0) - - -class BucketReader(Referenceable): - implements(RIBucketReader) - - def __init__(self, ss, sharefname, storage_index=None, shnum=None): - self.ss = ss - self._share_file = ShareFile(sharefname) - self.storage_index = storage_index - self.shnum = shnum - - def __repr__(self): - return "<%s %s %s>" % (self.__class__.__name__, - base32.b2a_l(self.storage_index[:8], 60), - self.shnum) - - def remote_read(self, offset, length): - start = time.time() - data = self._share_file.read_share_data(offset, length) - self.ss.add_latency("read", time.time() - start) - self.ss.count("read") - return data - - def remote_advise_corrupt_share(self, reason): - return self.ss.remote_advise_corrupt_share("immutable", - self.storage_index, - self.shnum, - reason) hunk ./src/allmydata/storage/backends/disk/mutable.py 1 -import os, stat, struct hunk ./src/allmydata/storage/backends/disk/mutable.py 2 -from allmydata.interfaces import BadWriteEnablerError -from allmydata.util import idlib, log +import struct + +from allmydata.interfaceutil import implements +from allmydata.interfaces import IStoredShare, BadWriteEnablerError +from allmydata.util import fileutil, idlib, log from allmydata.util.assertutil import precondition from allmydata.util.hashutil import constant_time_compare hunk ./src/allmydata/storage/backends/disk/mutable.py 9 -from allmydata.storage.lease import LeaseInfo -from allmydata.storage.common import UnknownMutableContainerVersionError, \ +from allmydata.util.encodingutil import quote_filepath +from allmydata.storage.common import si_b2a, UnknownMutableContainerVersionError, \ DataTooLargeError hunk ./src/allmydata/storage/backends/disk/mutable.py 12 +from allmydata.storage.lease import LeaseInfo +from allmydata.storage.backends.base import testv_compare + hunk ./src/allmydata/storage/backends/disk/mutable.py 16 -# the MutableShareFile is like the ShareFile, but used for mutable data. It -# has a different layout. See docs/mutable.txt for more details. +# The MutableDiskShare is like the ImmutableDiskShare, but used for mutable data. +# It has a different layout. See docs/mutable.rst for more details. # # offset size name # 1 0 32 magic verstr "tahoe mutable container v1" plus binary hunk ./src/allmydata/storage/backends/disk/mutable.py 30 # 4 4 expiration timestamp # 8 32 renewal token # 40 32 cancel token -# 72 20 nodeid which accepted the tokens +# 72 20 nodeid that accepted the tokens # 7 468 (a) data # 8 ?? 4 count of extra leases # 9 ?? n*92 extra leases hunk ./src/allmydata/storage/backends/disk/mutable.py 36 -# The struct module doc says that L's are 4 bytes in size., and that Q's are +# The struct module doc says that L's are 4 bytes in size, and that Q's are # 8 bytes in size. Since compatibility depends upon this, double-check it. assert struct.calcsize(">L") == 4, struct.calcsize(">L") assert struct.calcsize(">Q") == 8, struct.calcsize(">Q") hunk ./src/allmydata/storage/backends/disk/mutable.py 41 -class MutableShareFile: + +class MutableDiskShare(object): + implements(IStoredShare) sharetype = "mutable" DATA_LENGTH_OFFSET = struct.calcsize(">32s20s32s") hunk ./src/allmydata/storage/backends/disk/mutable.py 53 assert LEASE_SIZE == 92 DATA_OFFSET = HEADER_SIZE + 4*LEASE_SIZE assert DATA_OFFSET == 468, DATA_OFFSET + # our sharefiles share with a recognizable string, plus some random # binary data to reduce the chance that a regular text file will look # like a sharefile. hunk ./src/allmydata/storage/backends/disk/mutable.py 62 MAX_SIZE = 2*1000*1000*1000 # 2GB, kind of arbitrary # TODO: decide upon a policy for max share size - def __init__(self, filename, parent=None): - self.home = filename - if os.path.exists(self.home): + def __init__(self, storageindex, shnum, home, parent=None): + self.storageindex = storageindex + self.shnum = shnum + self.home = home + if self.home.exists(): # we don't cache anything, just check the magic hunk ./src/allmydata/storage/backends/disk/mutable.py 68 - f = open(self.home, 'rb') - data = f.read(self.HEADER_SIZE) - (magic, - write_enabler_nodeid, write_enabler, - data_length, extra_least_offset) = \ - struct.unpack(">32s20s32sQQ", data) - if magic != self.MAGIC: - msg = "sharefile %s had magic '%r' but we wanted '%r'" % \ - (filename, magic, self.MAGIC) - raise UnknownMutableContainerVersionError(msg) + f = self.home.open('rb') + try: + data = f.read(self.HEADER_SIZE) + (magic, + write_enabler_nodeid, write_enabler, + data_length, extra_least_offset) = \ + struct.unpack(">32s20s32sQQ", data) + if magic != self.MAGIC: + msg = "sharefile %s had magic '%r' but we wanted '%r'" % \ + (quote_filepath(self.home), magic, self.MAGIC) + raise UnknownMutableContainerVersionError(msg) + finally: + f.close() self.parent = parent # for logging def log(self, *args, **kwargs): hunk ./src/allmydata/storage/backends/disk/mutable.py 87 return self.parent.log(*args, **kwargs) def create(self, my_nodeid, write_enabler): - assert not os.path.exists(self.home) + assert not self.home.exists() data_length = 0 extra_lease_offset = (self.HEADER_SIZE + 4 * self.LEASE_SIZE hunk ./src/allmydata/storage/backends/disk/mutable.py 94 + data_length) assert extra_lease_offset == self.DATA_OFFSET # true at creation num_extra_leases = 0 - f = open(self.home, 'wb') - header = struct.pack(">32s20s32sQQ", - self.MAGIC, my_nodeid, write_enabler, - data_length, extra_lease_offset, - ) - leases = ("\x00"*self.LEASE_SIZE) * 4 - f.write(header + leases) - # data goes here, empty after creation - f.write(struct.pack(">L", num_extra_leases)) - # extra leases go here, none at creation - f.close() + f = self.home.open('wb') + try: + header = struct.pack(">32s20s32sQQ", + self.MAGIC, my_nodeid, write_enabler, + data_length, extra_lease_offset, + ) + leases = ("\x00"*self.LEASE_SIZE) * 4 + f.write(header + leases) + # data goes here, empty after creation + f.write(struct.pack(">L", num_extra_leases)) + # extra leases go here, none at creation + finally: + f.close() + + def __repr__(self): + return ("" + % (si_b2a(self.storageindex), self.shnum, quote_filepath(self.home))) + + def get_used_space(self): + return fileutil.get_used_space(self.home) + + def get_shnum(self): + return self.shnum def unlink(self): hunk ./src/allmydata/storage/backends/disk/mutable.py 119 - os.unlink(self.home) + self.home.remove() def _read_data_length(self, f): f.seek(self.DATA_LENGTH_OFFSET) hunk ./src/allmydata/storage/backends/disk/mutable.py 287 def get_leases(self): """Yields a LeaseInfo instance for all leases.""" - f = open(self.home, 'rb') - for i, lease in self._enumerate_leases(f): - yield lease - f.close() + f = self.home.open('rb') + try: + for i, lease in self._enumerate_leases(f): + yield lease + finally: + f.close() def _enumerate_leases(self, f): for i in range(self._get_num_lease_slots(f)): hunk ./src/allmydata/storage/backends/disk/mutable.py 299 try: data = self._read_lease_record(f, i) if data is not None: - yield i,data + yield i, data except IndexError: return hunk ./src/allmydata/storage/backends/disk/mutable.py 303 + # These lease operations are intended for use by disk_backend.py. + # Other clients should not depend on the fact that the disk backend + # stores leases in share files. + def add_lease(self, lease_info): precondition(lease_info.owner_num != 0) # 0 means "no lease here" hunk ./src/allmydata/storage/backends/disk/mutable.py 309 - f = open(self.home, 'rb+') - num_lease_slots = self._get_num_lease_slots(f) - empty_slot = self._get_first_empty_lease_slot(f) - if empty_slot is not None: - self._write_lease_record(f, empty_slot, lease_info) - else: - self._write_lease_record(f, num_lease_slots, lease_info) - f.close() + f = self.home.open('rb+') + try: + num_lease_slots = self._get_num_lease_slots(f) + empty_slot = self._get_first_empty_lease_slot(f) + if empty_slot is not None: + self._write_lease_record(f, empty_slot, lease_info) + else: + self._write_lease_record(f, num_lease_slots, lease_info) + finally: + f.close() def renew_lease(self, renew_secret, new_expire_time): accepting_nodeids = set() hunk ./src/allmydata/storage/backends/disk/mutable.py 322 - f = open(self.home, 'rb+') - for (leasenum,lease) in self._enumerate_leases(f): - if constant_time_compare(lease.renew_secret, renew_secret): - # yup. See if we need to update the owner time. - if new_expire_time > lease.expiration_time: - # yes - lease.expiration_time = new_expire_time - self._write_lease_record(f, leasenum, lease) - f.close() - return - accepting_nodeids.add(lease.nodeid) - f.close() + f = self.home.open('rb+') + try: + for (leasenum, lease) in self._enumerate_leases(f): + if constant_time_compare(lease.renew_secret, renew_secret): + # yup. See if we need to update the owner time. + if new_expire_time > lease.expiration_time: + # yes + lease.expiration_time = new_expire_time + self._write_lease_record(f, leasenum, lease) + return + accepting_nodeids.add(lease.nodeid) + finally: + f.close() # Return the accepting_nodeids set, to give the client a chance to hunk ./src/allmydata/storage/backends/disk/mutable.py 336 - # update the leases on a share which has been migrated from its + # update the leases on a share that has been migrated from its # original server to a new one. msg = ("Unable to renew non-existent lease. I have leases accepted by" " nodeids: ") hunk ./src/allmydata/storage/backends/disk/mutable.py 353 except IndexError: self.add_lease(lease_info) - def cancel_lease(self, cancel_secret): - """Remove any leases with the given cancel_secret. If the last lease - is cancelled, the file will be removed. Return the number of bytes - that were freed (by truncating the list of leases, and possibly by - deleting the file. Raise IndexError if there was no lease with the - given cancel_secret.""" - - accepting_nodeids = set() - modified = 0 - remaining = 0 - blank_lease = LeaseInfo(owner_num=0, - renew_secret="\x00"*32, - cancel_secret="\x00"*32, - expiration_time=0, - nodeid="\x00"*20) - f = open(self.home, 'rb+') - for (leasenum,lease) in self._enumerate_leases(f): - accepting_nodeids.add(lease.nodeid) - if constant_time_compare(lease.cancel_secret, cancel_secret): - self._write_lease_record(f, leasenum, blank_lease) - modified += 1 - else: - remaining += 1 - if modified: - freed_space = self._pack_leases(f) - f.close() - if not remaining: - freed_space += os.stat(self.home)[stat.ST_SIZE] - self.unlink() - return freed_space - - msg = ("Unable to cancel non-existent lease. I have leases " - "accepted by nodeids: ") - msg += ",".join([("'%s'" % idlib.nodeid_b2a(anid)) - for anid in accepting_nodeids]) - msg += " ." - raise IndexError(msg) - - def _pack_leases(self, f): - # TODO: reclaim space from cancelled leases - return 0 - def _read_write_enabler_and_nodeid(self, f): f.seek(0) data = f.read(self.HEADER_SIZE) hunk ./src/allmydata/storage/backends/disk/mutable.py 365 def readv(self, readv): datav = [] - f = open(self.home, 'rb') - for (offset, length) in readv: - datav.append(self._read_share_data(f, offset, length)) - f.close() + f = self.home.open('rb') + try: + for (offset, length) in readv: + datav.append(self._read_share_data(f, offset, length)) + finally: + f.close() return datav hunk ./src/allmydata/storage/backends/disk/mutable.py 373 -# def remote_get_length(self): -# f = open(self.home, 'rb') -# data_length = self._read_data_length(f) -# f.close() -# return data_length + def get_data_length(self): + f = self.home.open('rb') + try: + data_length = self._read_data_length(f) + finally: + f.close() + return data_length def check_write_enabler(self, write_enabler, si_s): hunk ./src/allmydata/storage/backends/disk/mutable.py 382 - f = open(self.home, 'rb+') - (real_write_enabler, write_enabler_nodeid) = \ - self._read_write_enabler_and_nodeid(f) - f.close() + f = self.home.open('rb+') + try: + (real_write_enabler, write_enabler_nodeid) = self._read_write_enabler_and_nodeid(f) + finally: + f.close() # avoid a timing attack #if write_enabler != real_write_enabler: if not constant_time_compare(write_enabler, real_write_enabler): hunk ./src/allmydata/storage/backends/disk/mutable.py 403 def check_testv(self, testv): test_good = True - f = open(self.home, 'rb+') - for (offset, length, operator, specimen) in testv: - data = self._read_share_data(f, offset, length) - if not testv_compare(data, operator, specimen): - test_good = False - break - f.close() + f = self.home.open('rb+') + try: + for (offset, length, operator, specimen) in testv: + data = self._read_share_data(f, offset, length) + if not testv_compare(data, operator, specimen): + test_good = False + break + finally: + f.close() return test_good def writev(self, datav, new_length): hunk ./src/allmydata/storage/backends/disk/mutable.py 415 - f = open(self.home, 'rb+') - for (offset, data) in datav: - self._write_share_data(f, offset, data) - if new_length is not None: - cur_length = self._read_data_length(f) - if new_length < cur_length: - self._write_data_length(f, new_length) - # TODO: if we're going to shrink the share file when the - # share data has shrunk, then call - # self._change_container_size() here. - f.close() - -def testv_compare(a, op, b): - assert op in ("lt", "le", "eq", "ne", "ge", "gt") - if op == "lt": - return a < b - if op == "le": - return a <= b - if op == "eq": - return a == b - if op == "ne": - return a != b - if op == "ge": - return a >= b - if op == "gt": - return a > b - # never reached - -class EmptyShare: + f = self.home.open('rb+') + try: + for (offset, data) in datav: + self._write_share_data(f, offset, data) + if new_length is not None: + cur_length = self._read_data_length(f) + if new_length < cur_length: + self._write_data_length(f, new_length) + # TODO: if we're going to shrink the share file when the + # share data has shrunk, then call + # self._change_container_size() here. + finally: + f.close() hunk ./src/allmydata/storage/backends/disk/mutable.py 429 - def check_testv(self, testv): - test_good = True - for (offset, length, operator, specimen) in testv: - data = "" - if not testv_compare(data, operator, specimen): - test_good = False - break - return test_good hunk ./src/allmydata/storage/backends/disk/mutable.py 430 -def create_mutable_sharefile(filename, my_nodeid, write_enabler, parent): - ms = MutableShareFile(filename, parent) - ms.create(my_nodeid, write_enabler) +def create_mutable_disk_share(fp, nodeid, write_enabler, parent): + ms = MutableDiskShare(fp, parent) + ms.create(nodeid, write_enabler) del ms hunk ./src/allmydata/storage/backends/disk/mutable.py 434 - return MutableShareFile(filename, parent) - + return MutableDiskShare(fp, parent) addfile ./src/allmydata/storage/backends/null/__init__.py addfile ./src/allmydata/storage/backends/null/core.py hunk ./src/allmydata/storage/backends/null/core.py 2 +import os, struct + +from allmydata.interfaceutil import implements +from allmydata.interfaces import IStorageBackend, IShareSet, IStoredShare +from allmydata.util.assertutil import precondition +from allmydata.util.hashutil import constant_time_compare +from allmydata.storage.backends.base import Backend +from allmydata.storage.bucket import BucketWriter +from allmydata.storage.common import si_b2a +from allmydata.storage.lease import LeaseInfo + + +class NullCore(Backend): + implements(IStorageBackend) + + def __init__(self): + Backend.__init__(self) + + def get_available_space(self): + return None + + def get_shareset(self, storageindex): + return NullShareSet(storageindex) + + def set_storage_server(self, ss): + self.ss = ss + + def advise_corrupt_share(self, storageindex, sharetype, shnum, reason): + pass + + +class NullShareSet(object): + implements(IShareSet) + + def __init__(self, storageindex): + self.storageindex = storageindex + + def get_overhead(self): + return 0 + + def get_incoming_shnums(self): + return frozenset() + + def get_shares(self): + pass + + def get_share(self, shnum): + return None + + def get_storage_index(self): + return self.storageindex + + def get_storage_index_string(self): + return si_b2a(self.storageindex) + + def make_bucket_writer(self, shnum, max_space_per_bucket, lease_info, canary): + immutableshare = ImmutableNullShare() + return BucketWriter(self.ss, immutableshare, max_space_per_bucket, lease_info, canary) + + +class ImmutableNullShare: + implements(IStoredShare) + sharetype = "immutable" + + def __init__(self): + """ If max_size is not None then I won't allow more than + max_size to be written to me. If create=True then max_size + must not be None. """ + pass + + def get_shnum(self): + return self.shnum + + def unlink(self): + os.unlink(self.fname) + + def read_share_data(self, offset, length): + precondition(offset >= 0) + # Reads beyond the end of the data are truncated. Reads that start + # beyond the end of the data return an empty string. + seekpos = self._data_offset+offset + fsize = os.path.getsize(self.fname) + actuallength = max(0, min(length, fsize-seekpos)) # XXX #1528 + if actuallength == 0: + return "" + f = open(self.fname, 'rb') + f.seek(seekpos) + return f.read(actuallength) + + def write_share_data(self, offset, data): + pass + + def _write_lease_record(self, f, lease_number, lease_info): + offset = self._lease_offset + lease_number * self.LEASE_SIZE + f.seek(offset) + assert f.tell() == offset + f.write(lease_info.to_immutable_data()) + + def _read_num_leases(self, f): + f.seek(0x08) + (num_leases,) = struct.unpack(">L", f.read(4)) + return num_leases + + def _write_num_leases(self, f, num_leases): + f.seek(0x08) + f.write(struct.pack(">L", num_leases)) + + def _truncate_leases(self, f, num_leases): + f.truncate(self._lease_offset + num_leases * self.LEASE_SIZE) + + def get_leases(self): + """Yields a LeaseInfo instance for all leases.""" + f = open(self.fname, 'rb') + (version, unused, num_leases) = struct.unpack(">LLL", f.read(0xc)) + f.seek(self._lease_offset) + for i in range(num_leases): + data = f.read(self.LEASE_SIZE) + if data: + yield LeaseInfo().from_immutable_data(data) + + def add_lease(self, lease): + pass + + def renew_lease(self, renew_secret, new_expire_time): + for i,lease in enumerate(self.get_leases()): + if constant_time_compare(lease.renew_secret, renew_secret): + # yup. See if we need to update the owner time. + if new_expire_time > lease.expiration_time: + # yes + lease.expiration_time = new_expire_time + f = open(self.fname, 'rb+') + self._write_lease_record(f, i, lease) + f.close() + return + raise IndexError("unable to renew non-existent lease") + + def add_or_renew_lease(self, lease_info): + try: + self.renew_lease(lease_info.renew_secret, + lease_info.expiration_time) + except IndexError: + self.add_lease(lease_info) + + +class MutableNullShare: + implements(IStoredShare) + sharetype = "mutable" + + """ XXX: TODO """ addfile ./src/allmydata/storage/bucket.py hunk ./src/allmydata/storage/bucket.py 1 + +import time + +from foolscap.api import Referenceable + +from zope.interface import implements +from allmydata.interfaces import RIBucketWriter, RIBucketReader +from allmydata.util import base32, log +from allmydata.util.assertutil import precondition + + +class BucketWriter(Referenceable): + implements(RIBucketWriter) + + def __init__(self, ss, immutableshare, max_size, lease_info, canary): + self.ss = ss + self._max_size = max_size # don't allow the client to write more than this + self._canary = canary + self._disconnect_marker = canary.notifyOnDisconnect(self._disconnected) + self.closed = False + self.throw_out_all_data = False + self._share = immutableshare + # also, add our lease to the file now, so that other ones can be + # added by simultaneous uploaders + self._share.add_lease(lease_info) + + def allocated_size(self): + return self._max_size + + def remote_write(self, offset, data): + start = time.time() + precondition(not self.closed) + if self.throw_out_all_data: + return + self._share.write_share_data(offset, data) + self.ss.add_latency("write", time.time() - start) + self.ss.count("write") + + def remote_close(self): + precondition(not self.closed) + start = time.time() + + self._share.close() + filelen = self._share.stat() + self._share = None + + self.closed = True + self._canary.dontNotifyOnDisconnect(self._disconnect_marker) + + self.ss.bucket_writer_closed(self, filelen) + self.ss.add_latency("close", time.time() - start) + self.ss.count("close") + + def _disconnected(self): + if not self.closed: + self._abort() + + def remote_abort(self): + log.msg("storage: aborting write to share %r" % self._share, + facility="tahoe.storage", level=log.UNUSUAL) + if not self.closed: + self._canary.dontNotifyOnDisconnect(self._disconnect_marker) + self._abort() + self.ss.count("abort") + + def _abort(self): + if self.closed: + return + self._share.unlink() + self._share = None + + # We are now considered closed for further writing. We must tell + # the storage server about this so that it stops expecting us to + # use the space it allocated for us earlier. + self.closed = True + self.ss.bucket_writer_closed(self, 0) + + +class BucketReader(Referenceable): + implements(RIBucketReader) + + def __init__(self, ss, share): + self.ss = ss + self._share = share + self.storageindex = share.storageindex + self.shnum = share.shnum + + def __repr__(self): + return "<%s %s %s>" % (self.__class__.__name__, + base32.b2a_l(self.storageindex[:8], 60), + self.shnum) + + def remote_read(self, offset, length): + start = time.time() + data = self._share.read_share_data(offset, length) + self.ss.add_latency("read", time.time() - start) + self.ss.count("read") + return data + + def remote_advise_corrupt_share(self, reason): + return self.ss.remote_advise_corrupt_share("immutable", + self.storageindex, + self.shnum, + reason) hunk ./src/allmydata/storage/common.py 1 - -import os.path from allmydata.util import base32 class DataTooLargeError(Exception): hunk ./src/allmydata/storage/common.py 5 pass + class UnknownMutableContainerVersionError(Exception): pass hunk ./src/allmydata/storage/common.py 8 + class UnknownImmutableContainerVersionError(Exception): pass hunk ./src/allmydata/storage/common.py 18 def si_a2b(ascii_storageindex): return base32.a2b(ascii_storageindex) - -def storage_index_to_dir(storageindex): - sia = si_b2a(storageindex) - return os.path.join(sia[:2], sia) hunk ./src/allmydata/storage/crawler.py 2 -import os, time, struct +import time, struct import cPickle as pickle from twisted.internet import reactor from twisted.application import service hunk ./src/allmydata/storage/crawler.py 7 from allmydata.storage.common import si_b2a -from allmydata.util import fileutil + class TimeSliceExceeded(Exception): pass hunk ./src/allmydata/storage/crawler.py 12 + class ShareCrawler(service.MultiService): hunk ./src/allmydata/storage/crawler.py 14 - """A ShareCrawler subclass is attached to a StorageServer, and - periodically walks all of its shares, processing each one in some - fashion. This crawl is rate-limited, to reduce the IO burden on the host, - since large servers can easily have a terabyte of shares, in several - million files, which can take hours or days to read. + """ + An instance of a subclass of ShareCrawler is attached to a storage + backend, and periodically walks the backend's shares, processing them + in some fashion. This crawl is rate-limited to reduce the I/O burden on + the host, since large servers can easily have a terabyte of shares in + several million files, which can take hours or days to read. Once the crawler starts a cycle, it will proceed at a rate limited by the allowed_cpu_percentage= and cpu_slice= parameters: yielding the reactor hunk ./src/allmydata/storage/crawler.py 30 long enough to ensure that 'minimum_cycle_time' elapses between the start of two consecutive cycles. - We assume that the normal upload/download/get_buckets traffic of a tahoe + We assume that the normal upload/download/DYHB traffic of a Tahoe-LAFS grid will cause the prefixdir contents to be mostly cached in the kernel, hunk ./src/allmydata/storage/crawler.py 32 - or that the number of buckets in each prefixdir will be small enough to - load quickly. A 1TB allmydata.com server was measured to have 2.56M - buckets, spread into the 1024 prefixdirs, with about 2500 buckets per + or that the number of sharesets in each prefixdir will be small enough to + load quickly. A 1TB allmydata.com server was measured to have 2.56 million + sharesets, spread into the 1024 prefixdirs, with about 2500 sharesets per prefix. On this server, each prefixdir took 130ms-200ms to list the first time, and 17ms to list the second time. hunk ./src/allmydata/storage/crawler.py 38 - To use a crawler, create a subclass which implements the process_bucket() - method. It will be called with a prefixdir and a base32 storage index - string. process_bucket() must run synchronously. Any keys added to - self.state will be preserved. Override add_initial_state() to set up - initial state keys. Override finished_cycle() to perform additional - processing when the cycle is complete. Any status that the crawler - produces should be put in the self.state dictionary. Status renderers - (like a web page which describes the accomplishments of your crawler) - will use crawler.get_state() to retrieve this dictionary; they can - present the contents as they see fit. + To implement a crawler, create a subclass that implements the + process_shareset() method. It will be called with a prefixdir and an + object providing the IShareSet interface. process_shareset() must run + synchronously. Any keys added to self.state will be preserved. Override + add_initial_state() to set up initial state keys. Override + finished_cycle() to perform additional processing when the cycle is + complete. Any status that the crawler produces should be put in the + self.state dictionary. Status renderers (like a web page describing the + accomplishments of your crawler) will use crawler.get_state() to retrieve + this dictionary; they can present the contents as they see fit. hunk ./src/allmydata/storage/crawler.py 49 - Then create an instance, with a reference to a StorageServer and a - filename where it can store persistent state. The statefile is used to - keep track of how far around the ring the process has travelled, as well - as timing history to allow the pace to be predicted and controlled. The - statefile will be updated and written to disk after each time slice (just - before the crawler yields to the reactor), and also after each cycle is - finished, and also when stopService() is called. Note that this means - that a crawler which is interrupted with SIGKILL while it is in the - middle of a time slice will lose progress: the next time the node is - started, the crawler will repeat some unknown amount of work. + Then create an instance, with a reference to a backend object providing + the IStorageBackend interface, and a filename where it can store + persistent state. The statefile is used to keep track of how far around + the ring the process has travelled, as well as timing history to allow + the pace to be predicted and controlled. The statefile will be updated + and written to disk after each time slice (just before the crawler yields + to the reactor), and also after each cycle is finished, and also when + stopService() is called. Note that this means that a crawler that is + interrupted with SIGKILL while it is in the middle of a time slice will + lose progress: the next time the node is started, the crawler will repeat + some unknown amount of work. The crawler instance must be started with startService() before it will hunk ./src/allmydata/storage/crawler.py 62 - do any work. To make it stop doing work, call stopService(). + do any work. To make it stop doing work, call stopService(). A crawler + is usually a child service of a StorageServer, although it should not + depend on that. + + For historical reasons, some dictionary key names use the term "bucket" + for what is now preferably called a "shareset" (the set of shares that a + server holds under a given storage index). """ slow_start = 300 # don't start crawling for 5 minutes after startup hunk ./src/allmydata/storage/crawler.py 77 cpu_slice = 1.0 # use up to 1.0 seconds before yielding minimum_cycle_time = 300 # don't run a cycle faster than this - def __init__(self, server, statefile, allowed_cpu_percentage=None): + def __init__(self, backend, statefp, allowed_cpu_percentage=None): service.MultiService.__init__(self) hunk ./src/allmydata/storage/crawler.py 79 + self.backend = backend + self.statefp = statefp if allowed_cpu_percentage is not None: self.allowed_cpu_percentage = allowed_cpu_percentage hunk ./src/allmydata/storage/crawler.py 83 - self.server = server - self.sharedir = server.sharedir - self.statefile = statefile self.prefixes = [si_b2a(struct.pack(">H", i << (16-10)))[:2] for i in range(2**10)] self.prefixes.sort() hunk ./src/allmydata/storage/crawler.py 87 self.timer = None - self.bucket_cache = (None, []) + self.shareset_cache = (None, []) self.current_sleep_time = None self.next_wake_time = None self.last_prefix_finished_time = None hunk ./src/allmydata/storage/crawler.py 150 left = len(self.prefixes) - self.last_complete_prefix_index remaining = left * self.last_prefix_elapsed_time # TODO: remainder of this prefix: we need to estimate the - # per-bucket time, probably by measuring the time spent on - # this prefix so far, divided by the number of buckets we've + # per-shareset time, probably by measuring the time spent on + # this prefix so far, divided by the number of sharesets we've # processed. d["estimated-cycle-complete-time-left"] = remaining # it's possible to call get_progress() from inside a crawler's hunk ./src/allmydata/storage/crawler.py 171 state dictionary. If we are not currently sleeping (i.e. get_state() was called from - inside the process_prefixdir, process_bucket, or finished_cycle() + inside the process_prefixdir, process_shareset, or finished_cycle() methods, or if startService has not yet been called on this crawler), these two keys will be None. hunk ./src/allmydata/storage/crawler.py 184 def load_state(self): # we use this to store state for both the crawler's internals and # anything the subclass-specific code needs. The state is stored - # after each bucket is processed, after each prefixdir is processed, + # after each shareset is processed, after each prefixdir is processed, # and after a cycle is complete. The internal keys we use are: # ["version"]: int, always 1 # ["last-cycle-finished"]: int, or None if we have not yet finished hunk ./src/allmydata/storage/crawler.py 198 # are sleeping between cycles, or if we # have not yet finished any prefixdir since # a cycle was started - # ["last-complete-bucket"]: str, base32 storage index bucket name - # of the last bucket to be processed, or - # None if we are sleeping between cycles + # ["last-complete-bucket"]: str, base32 storage index of the last + # shareset to be processed, or None if we + # are sleeping between cycles try: hunk ./src/allmydata/storage/crawler.py 202 - f = open(self.statefile, "rb") - state = pickle.load(f) - f.close() + state = pickle.loads(self.statefp.getContent()) except EnvironmentError: state = {"version": 1, "last-cycle-finished": None, hunk ./src/allmydata/storage/crawler.py 238 else: last_complete_prefix = self.prefixes[lcpi] self.state["last-complete-prefix"] = last_complete_prefix - tmpfile = self.statefile + ".tmp" - f = open(tmpfile, "wb") - pickle.dump(self.state, f) - f.close() - fileutil.move_into_place(tmpfile, self.statefile) + self.statefp.setContent(pickle.dumps(self.state)) def startService(self): # arrange things to look like we were just sleeping, so hunk ./src/allmydata/storage/crawler.py 280 sleep_time = (this_slice / self.allowed_cpu_percentage) - this_slice # if the math gets weird, or a timequake happens, don't sleep # forever. Note that this means that, while a cycle is running, we - # will process at least one bucket every 5 minutes, no matter how - # long that bucket takes. + # will process at least one shareset every 5 minutes, no matter how + # long that shareset takes. sleep_time = max(0.0, min(sleep_time, 299)) if finished_cycle: # how long should we sleep between cycles? Don't run faster than hunk ./src/allmydata/storage/crawler.py 311 for i in range(self.last_complete_prefix_index+1, len(self.prefixes)): # if we want to yield earlier, just raise TimeSliceExceeded() prefix = self.prefixes[i] - prefixdir = os.path.join(self.sharedir, prefix) - if i == self.bucket_cache[0]: - buckets = self.bucket_cache[1] + if i == self.shareset_cache[0]: + sharesets = self.shareset_cache[1] else: hunk ./src/allmydata/storage/crawler.py 314 - try: - buckets = os.listdir(prefixdir) - buckets.sort() - except EnvironmentError: - buckets = [] - self.bucket_cache = (i, buckets) - self.process_prefixdir(cycle, prefix, prefixdir, - buckets, start_slice) + sharesets = self.backend.get_sharesets_for_prefix(prefix) + self.shareset_cache = (i, sharesets) + self.process_prefixdir(cycle, prefix, sharesets, start_slice) self.last_complete_prefix_index = i now = time.time() hunk ./src/allmydata/storage/crawler.py 341 self.finished_cycle(cycle) self.save_state() - def process_prefixdir(self, cycle, prefix, prefixdir, buckets, start_slice): - """This gets a list of bucket names (i.e. storage index strings, + def process_prefixdir(self, cycle, prefix, sharesets, start_slice): + """ + This gets a list of shareset names (i.e. storage index strings, base32-encoded) in sorted order. You can override this if your crawler doesn't care about the actual hunk ./src/allmydata/storage/crawler.py 348 shares, for example a crawler which merely keeps track of how many - buckets are being managed by this server. + sharesets are being managed by this server. hunk ./src/allmydata/storage/crawler.py 350 - Subclasses which *do* care about actual bucket should leave this - method along, and implement process_bucket() instead. + Subclasses which *do* care about actual shareset should leave this + method alone, and implement process_shareset() instead. """ hunk ./src/allmydata/storage/crawler.py 354 - for bucket in buckets: - if bucket <= self.state["last-complete-bucket"]: + for shareset in sharesets: + base32si = shareset.get_storage_index_string() + if base32si <= self.state["last-complete-bucket"]: continue hunk ./src/allmydata/storage/crawler.py 358 - self.process_bucket(cycle, prefix, prefixdir, bucket) - self.state["last-complete-bucket"] = bucket + self.process_shareset(cycle, prefix, shareset) + self.state["last-complete-bucket"] = base32si if time.time() >= start_slice + self.cpu_slice: raise TimeSliceExceeded() hunk ./src/allmydata/storage/crawler.py 366 # the remaining methods are explictly for subclasses to implement. def started_cycle(self, cycle): - """Notify a subclass that the crawler is about to start a cycle. + """ + Notify a subclass that the crawler is about to start a cycle. This method is for subclasses to override. No upcall is necessary. """ hunk ./src/allmydata/storage/crawler.py 373 pass - def process_bucket(self, cycle, prefix, prefixdir, storage_index_b32): - """Examine a single bucket. Subclasses should do whatever they want + def process_shareset(self, cycle, prefix, shareset): + """ + Examine a single shareset. Subclasses should do whatever they want to do to the shares therein, then update self.state as necessary. If the crawler is never interrupted by SIGKILL, this method will be hunk ./src/allmydata/storage/crawler.py 379 - called exactly once per share (per cycle). If it *is* interrupted, + called exactly once per shareset (per cycle). If it *is* interrupted, then the next time the node is started, some amount of work will be duplicated, according to when self.save_state() was last called. By default, save_state() is called at the end of each timeslice, and hunk ./src/allmydata/storage/crawler.py 387 To reduce the chance of duplicate work (i.e. to avoid adding multiple records to a database), you can call save_state() at the end of your - process_bucket() method. This will reduce the maximum duplicated work - to one bucket per SIGKILL. It will also add overhead, probably 1-20ms - per bucket (and some disk writes), which will count against your - allowed_cpu_percentage, and which may be considerable if - process_bucket() runs quickly. + process_shareset() method. This will reduce the maximum duplicated + work to one shareset per SIGKILL. It will also add overhead, probably + 1-20ms per shareset (and some disk writes), which will count against + your allowed_cpu_percentage, and which may be considerable if + process_shareset() runs quickly. This method is for subclasses to override. No upcall is necessary. """ hunk ./src/allmydata/storage/crawler.py 398 pass def finished_prefix(self, cycle, prefix): - """Notify a subclass that the crawler has just finished processing a - prefix directory (all buckets with the same two-character/10bit + """ + Notify a subclass that the crawler has just finished processing a + prefix directory (all sharesets with the same two-character/10-bit prefix). To impose a limit on how much work might be duplicated by a SIGKILL that occurs during a timeslice, you can call self.save_state() here, but be aware that it may represent a hunk ./src/allmydata/storage/crawler.py 411 pass def finished_cycle(self, cycle): - """Notify subclass that a cycle (one complete traversal of all + """ + Notify subclass that a cycle (one complete traversal of all prefixdirs) has just finished. 'cycle' is the number of the cycle that just finished. This method should perform summary work and update self.state to publish information to status displays. hunk ./src/allmydata/storage/crawler.py 429 pass def yielding(self, sleep_time): - """The crawler is about to sleep for 'sleep_time' seconds. This + """ + The crawler is about to sleep for 'sleep_time' seconds. This method is mostly for the convenience of unit tests. This method is for subclasses to override. No upcall is necessary. hunk ./src/allmydata/storage/crawler.py 439 class BucketCountingCrawler(ShareCrawler): - """I keep track of how many buckets are being managed by this server. - This is equivalent to the number of distributed files and directories for - which I am providing storage. The actual number of files+directories in - the full grid is probably higher (especially when there are more servers - than 'N', the number of generated shares), because some files+directories - will have shares on other servers instead of me. Also note that the - number of buckets will differ from the number of shares in small grids, - when more than one share is placed on a single server. + """ + I keep track of how many sharesets, each corresponding to a storage index, + are being managed by this server. This is equivalent to the number of + distributed files and directories for which I am providing storage. The + actual number of files and directories in the full grid is probably higher + (especially when there are more servers than 'N', the number of generated + shares), because some files and directories will have shares on other + servers instead of me. Also note that the number of sharesets will differ + from the number of shares in small grids, when more than one share is + placed on a single server. """ minimum_cycle_time = 60*60 # we don't need this more than once an hour hunk ./src/allmydata/storage/crawler.py 453 - def __init__(self, server, statefile, num_sample_prefixes=1): - ShareCrawler.__init__(self, server, statefile) + def __init__(self, backend, statefp, num_sample_prefixes=1): + ShareCrawler.__init__(self, backend, statefp) self.num_sample_prefixes = num_sample_prefixes def add_initial_state(self): hunk ./src/allmydata/storage/crawler.py 467 self.state.setdefault("last-complete-bucket-count", None) self.state.setdefault("storage-index-samples", {}) - def process_prefixdir(self, cycle, prefix, prefixdir, buckets, start_slice): + def process_prefixdir(self, cycle, prefix, sharesets, start_slice): # we override process_prefixdir() because we don't want to look at hunk ./src/allmydata/storage/crawler.py 469 - # the individual buckets. We'll save state after each one. On my + # the individual sharesets. We'll save state after each one. On my # laptop, a mostly-empty storage server can process about 70 # prefixdirs in a 1.0s slice. if cycle not in self.state["bucket-counts"]: hunk ./src/allmydata/storage/crawler.py 474 self.state["bucket-counts"][cycle] = {} - self.state["bucket-counts"][cycle][prefix] = len(buckets) + self.state["bucket-counts"][cycle][prefix] = len(sharesets) if prefix in self.prefixes[:self.num_sample_prefixes]: hunk ./src/allmydata/storage/crawler.py 476 - self.state["storage-index-samples"][prefix] = (cycle, buckets) + self.state["storage-index-samples"][prefix] = (cycle, sharesets) def finished_cycle(self, cycle): last_counts = self.state["bucket-counts"].get(cycle, []) hunk ./src/allmydata/storage/crawler.py 482 if len(last_counts) == len(self.prefixes): # great, we have a whole cycle. - num_buckets = sum(last_counts.values()) - self.state["last-complete-bucket-count"] = num_buckets + num_sharesets = sum(last_counts.values()) + self.state["last-complete-bucket-count"] = num_sharesets # get rid of old counts for old_cycle in list(self.state["bucket-counts"].keys()): if old_cycle != cycle: hunk ./src/allmydata/storage/crawler.py 490 del self.state["bucket-counts"][old_cycle] # get rid of old samples too for prefix in list(self.state["storage-index-samples"].keys()): - old_cycle,buckets = self.state["storage-index-samples"][prefix] + old_cycle, storage_indices = self.state["storage-index-samples"][prefix] if old_cycle != cycle: del self.state["storage-index-samples"][prefix] hunk ./src/allmydata/storage/crawler.py 493 - hunk ./src/allmydata/storage/expirer.py 1 -import time, os, pickle, struct + +import time, pickle, struct +from twisted.python import log as twlog + from allmydata.storage.crawler import ShareCrawler hunk ./src/allmydata/storage/expirer.py 6 -from allmydata.storage.shares import get_share_file -from allmydata.storage.common import UnknownMutableContainerVersionError, \ +from allmydata.storage.common import si_b2a, UnknownMutableContainerVersionError, \ UnknownImmutableContainerVersionError hunk ./src/allmydata/storage/expirer.py 8 -from twisted.python import log as twlog + class LeaseCheckingCrawler(ShareCrawler): """I examine the leases on all shares, determining which are still valid hunk ./src/allmydata/storage/expirer.py 17 removed. I collect statistics on the leases and make these available to a web - status page, including:: + status page, including: Space recovered during this cycle-so-far: actual (only if expiration_enabled=True): hunk ./src/allmydata/storage/expirer.py 21 - num-buckets, num-shares, sum of share sizes, real disk usage + num-storage-indices, num-shares, sum of share sizes, real disk usage ('real disk usage' means we use stat(fn).st_blocks*512 and include any space used by the directory) what it would have been with the original lease expiration time hunk ./src/allmydata/storage/expirer.py 32 Space recovered during the last 10 cycles <-- saved in separate pickle - Shares/buckets examined: + Shares/storage-indices examined: this cycle-so-far prediction of rest of cycle during last 10 cycles <-- separate pickle hunk ./src/allmydata/storage/expirer.py 42 Histogram of leases-per-share: this-cycle-to-date last 10 cycles <-- separate pickle - Histogram of lease ages, buckets = 1day + Histogram of lease ages, storage-indices over 1 day cycle-to-date last 10 cycles <-- separate pickle hunk ./src/allmydata/storage/expirer.py 53 slow_start = 360 # wait 6 minutes after startup minimum_cycle_time = 12*60*60 # not more than twice per day - def __init__(self, server, statefile, historyfile, - expiration_enabled, mode, - override_lease_duration, # used if expiration_mode=="age" - cutoff_date, # used if expiration_mode=="cutoff-date" - sharetypes): - self.historyfile = historyfile - self.expiration_enabled = expiration_enabled - self.mode = mode + def __init__(self, backend, statefp, historyfp, expiration_policy): + ShareCrawler.__init__(self, backend, statefp) + self.historyfp = historyfp + self.expiration_enabled = expiration_policy['enabled'] + self.mode = expiration_policy['mode'] self.override_lease_duration = None self.cutoff_date = None if self.mode == "age": hunk ./src/allmydata/storage/expirer.py 61 - assert isinstance(override_lease_duration, (int, type(None))) - self.override_lease_duration = override_lease_duration # seconds + assert isinstance(expiration_policy['override_lease_duration'], (int, type(None))) + self.override_lease_duration = expiration_policy['override_lease_duration'] # seconds elif self.mode == "cutoff-date": hunk ./src/allmydata/storage/expirer.py 64 - assert isinstance(cutoff_date, int) # seconds-since-epoch - assert cutoff_date is not None - self.cutoff_date = cutoff_date + assert isinstance(expiration_policy['cutoff_date'], int) # seconds-since-epoch + self.cutoff_date = expiration_policy['cutoff_date'] else: hunk ./src/allmydata/storage/expirer.py 67 - raise ValueError("GC mode '%s' must be 'age' or 'cutoff-date'" % mode) - self.sharetypes_to_expire = sharetypes - ShareCrawler.__init__(self, server, statefile) + raise ValueError("GC mode '%s' must be 'age' or 'cutoff-date'" % expiration_policy['mode']) + self.sharetypes_to_expire = expiration_policy['sharetypes'] def add_initial_state(self): # we fill ["cycle-to-date"] here (even though they will be reset in hunk ./src/allmydata/storage/expirer.py 82 self.state["cycle-to-date"].setdefault(k, so_far[k]) # initialize history - if not os.path.exists(self.historyfile): + if not self.historyfp.exists(): history = {} # cyclenum -> dict hunk ./src/allmydata/storage/expirer.py 84 - f = open(self.historyfile, "wb") - pickle.dump(history, f) - f.close() + self.historyfp.setContent(pickle.dumps(history)) def create_empty_cycle_dict(self): recovered = self.create_empty_recovered_dict() hunk ./src/allmydata/storage/expirer.py 97 def create_empty_recovered_dict(self): recovered = {} + # "buckets" is ambiguous; here it means the number of sharesets (one per storage index per server) for a in ("actual", "original", "configured", "examined"): for b in ("buckets", "shares", "sharebytes", "diskbytes"): recovered[a+"-"+b] = 0 hunk ./src/allmydata/storage/expirer.py 108 def started_cycle(self, cycle): self.state["cycle-to-date"] = self.create_empty_cycle_dict() - def stat(self, fn): - return os.stat(fn) - - def process_bucket(self, cycle, prefix, prefixdir, storage_index_b32): - bucketdir = os.path.join(prefixdir, storage_index_b32) - s = self.stat(bucketdir) + def process_storage_index(self, cycle, prefix, container): would_keep_shares = [] wks = None hunk ./src/allmydata/storage/expirer.py 111 + sharetype = None hunk ./src/allmydata/storage/expirer.py 113 - for fn in os.listdir(bucketdir): - try: - shnum = int(fn) - except ValueError: - continue # non-numeric means not a sharefile - sharefile = os.path.join(bucketdir, fn) + for share in container.get_shares(): + sharetype = share.sharetype try: hunk ./src/allmydata/storage/expirer.py 116 - wks = self.process_share(sharefile) + wks = self.process_share(share) except (UnknownMutableContainerVersionError, UnknownImmutableContainerVersionError, struct.error): hunk ./src/allmydata/storage/expirer.py 120 - twlog.msg("lease-checker error processing %s" % sharefile) + twlog.msg("lease-checker error processing %r" % (share,)) twlog.err() hunk ./src/allmydata/storage/expirer.py 122 - which = (storage_index_b32, shnum) + which = (si_b2a(share.storageindex), share.get_shnum()) self.state["cycle-to-date"]["corrupt-shares"].append(which) wks = (1, 1, 1, "unknown") would_keep_shares.append(wks) hunk ./src/allmydata/storage/expirer.py 127 - sharetype = None + container_type = None if wks: hunk ./src/allmydata/storage/expirer.py 129 - # use the last share's sharetype as the buckettype - sharetype = wks[3] + # use the last share's sharetype as the container type + container_type = wks[3] rec = self.state["cycle-to-date"]["space-recovered"] self.increment(rec, "examined-buckets", 1) if sharetype: hunk ./src/allmydata/storage/expirer.py 134 - self.increment(rec, "examined-buckets-"+sharetype, 1) + self.increment(rec, "examined-buckets-"+container_type, 1) + + container_diskbytes = container.get_overhead() hunk ./src/allmydata/storage/expirer.py 138 - try: - bucket_diskbytes = s.st_blocks * 512 - except AttributeError: - bucket_diskbytes = 0 # no stat().st_blocks on windows if sum([wks[0] for wks in would_keep_shares]) == 0: hunk ./src/allmydata/storage/expirer.py 139 - self.increment_bucketspace("original", bucket_diskbytes, sharetype) + self.increment_container_space("original", container_diskbytes, sharetype) if sum([wks[1] for wks in would_keep_shares]) == 0: hunk ./src/allmydata/storage/expirer.py 141 - self.increment_bucketspace("configured", bucket_diskbytes, sharetype) + self.increment_container_space("configured", container_diskbytes, sharetype) if sum([wks[2] for wks in would_keep_shares]) == 0: hunk ./src/allmydata/storage/expirer.py 143 - self.increment_bucketspace("actual", bucket_diskbytes, sharetype) + self.increment_container_space("actual", container_diskbytes, sharetype) hunk ./src/allmydata/storage/expirer.py 145 - def process_share(self, sharefilename): - # first, find out what kind of a share it is - sf = get_share_file(sharefilename) - sharetype = sf.sharetype + def process_share(self, share): + sharetype = share.sharetype now = time.time() hunk ./src/allmydata/storage/expirer.py 148 - s = self.stat(sharefilename) + sharebytes = share.get_size() + diskbytes = share.get_used_space() num_leases = 0 num_valid_leases_original = 0 hunk ./src/allmydata/storage/expirer.py 156 num_valid_leases_configured = 0 expired_leases_configured = [] - for li in sf.get_leases(): + for li in share.get_leases(): num_leases += 1 original_expiration_time = li.get_expiration_time() grant_renew_time = li.get_grant_renew_time_time() hunk ./src/allmydata/storage/expirer.py 169 # expired-or-not according to our configured age limit expired = False - if self.mode == "age": - age_limit = original_expiration_time - if self.override_lease_duration is not None: - age_limit = self.override_lease_duration - if age > age_limit: - expired = True - else: - assert self.mode == "cutoff-date" - if grant_renew_time < self.cutoff_date: - expired = True - if sharetype not in self.sharetypes_to_expire: - expired = False + if sharetype in self.sharetypes_to_expire: + if self.mode == "age": + age_limit = original_expiration_time + if self.override_lease_duration is not None: + age_limit = self.override_lease_duration + if age > age_limit: + expired = True + else: + assert self.mode == "cutoff-date" + if grant_renew_time < self.cutoff_date: + expired = True if expired: expired_leases_configured.append(li) hunk ./src/allmydata/storage/expirer.py 188 so_far = self.state["cycle-to-date"] self.increment(so_far["leases-per-share-histogram"], num_leases, 1) - self.increment_space("examined", s, sharetype) + self.increment_space("examined", diskbytes, sharetype) would_keep_share = [1, 1, 1, sharetype] hunk ./src/allmydata/storage/expirer.py 194 if self.expiration_enabled: for li in expired_leases_configured: - sf.cancel_lease(li.cancel_secret) + share.cancel_lease(li.cancel_secret) if num_valid_leases_original == 0: would_keep_share[0] = 0 hunk ./src/allmydata/storage/expirer.py 198 - self.increment_space("original", s, sharetype) + self.increment_space("original", sharebytes, diskbytes, sharetype) if num_valid_leases_configured == 0: would_keep_share[1] = 0 hunk ./src/allmydata/storage/expirer.py 202 - self.increment_space("configured", s, sharetype) + self.increment_space("configured", sharebytes, diskbytes, sharetype) if self.expiration_enabled: would_keep_share[2] = 0 hunk ./src/allmydata/storage/expirer.py 205 - self.increment_space("actual", s, sharetype) + self.increment_space("actual", sharebytes, diskbytes, sharetype) return would_keep_share hunk ./src/allmydata/storage/expirer.py 209 - def increment_space(self, a, s, sharetype): - sharebytes = s.st_size - try: - # note that stat(2) says that st_blocks is 512 bytes, and that - # st_blksize is "optimal file sys I/O ops blocksize", which is - # independent of the block-size that st_blocks uses. - diskbytes = s.st_blocks * 512 - except AttributeError: - # the docs say that st_blocks is only on linux. I also see it on - # MacOS. But it isn't available on windows. - diskbytes = sharebytes + def increment_space(self, a, sharebytes, diskbytes, sharetype): so_far_sr = self.state["cycle-to-date"]["space-recovered"] self.increment(so_far_sr, a+"-shares", 1) self.increment(so_far_sr, a+"-sharebytes", sharebytes) hunk ./src/allmydata/storage/expirer.py 219 self.increment(so_far_sr, a+"-sharebytes-"+sharetype, sharebytes) self.increment(so_far_sr, a+"-diskbytes-"+sharetype, diskbytes) - def increment_bucketspace(self, a, bucket_diskbytes, sharetype): + def increment_container_space(self, a, container_diskbytes, container_type): rec = self.state["cycle-to-date"]["space-recovered"] hunk ./src/allmydata/storage/expirer.py 221 - self.increment(rec, a+"-diskbytes", bucket_diskbytes) + self.increment(rec, a+"-diskbytes", container_diskbytes) self.increment(rec, a+"-buckets", 1) hunk ./src/allmydata/storage/expirer.py 223 - if sharetype: - self.increment(rec, a+"-diskbytes-"+sharetype, bucket_diskbytes) - self.increment(rec, a+"-buckets-"+sharetype, 1) + if container_type: + self.increment(rec, a+"-diskbytes-"+container_type, container_diskbytes) + self.increment(rec, a+"-buckets-"+container_type, 1) def increment(self, d, k, delta=1): if k not in d: hunk ./src/allmydata/storage/expirer.py 279 # copy() needs to become a deepcopy h["space-recovered"] = s["space-recovered"].copy() - history = pickle.load(open(self.historyfile, "rb")) + history = pickle.load(self.historyfp.getContent()) history[cycle] = h while len(history) > 10: oldcycles = sorted(history.keys()) hunk ./src/allmydata/storage/expirer.py 284 del history[oldcycles[0]] - f = open(self.historyfile, "wb") - pickle.dump(history, f) - f.close() + self.historyfp.setContent(pickle.dumps(history)) def get_state(self): """In addition to the crawler state described in hunk ./src/allmydata/storage/expirer.py 353 progress = self.get_progress() state = ShareCrawler.get_state(self) # does a shallow copy - history = pickle.load(open(self.historyfile, "rb")) + history = pickle.load(self.historyfp.getContent()) state["history"] = history if not progress["cycle-in-progress"]: hunk ./src/allmydata/storage/lease.py 17 def get_expiration_time(self): return self.expiration_time + def get_grant_renew_time_time(self): # hack, based upon fixed 31day expiration period return self.expiration_time - 31*24*60*60 hunk ./src/allmydata/storage/lease.py 21 + def get_age(self): return time.time() - self.get_grant_renew_time_time() hunk ./src/allmydata/storage/lease.py 32 self.expiration_time) = struct.unpack(">L32s32sL", data) self.nodeid = None return self + def to_immutable_data(self): return struct.pack(">L32s32sL", self.owner_num, hunk ./src/allmydata/storage/lease.py 45 int(self.expiration_time), self.renew_secret, self.cancel_secret, self.nodeid) + def from_mutable_data(self, data): (self.owner_num, self.expiration_time, hunk ./src/allmydata/storage/server.py 1 -import os, re, weakref, struct, time +import os, weakref, time from foolscap.api import Referenceable from twisted.application import service hunk ./src/allmydata/storage/server.py 11 from allmydata.util import fileutil, idlib, log, time_format import allmydata # for __full_version__ -from allmydata.storage.common import si_b2a, si_a2b, storage_index_to_dir -_pyflakes_hush = [si_b2a, si_a2b, storage_index_to_dir] # re-exported +from allmydata.storage.common import si_b2a from allmydata.storage.lease import LeaseInfo hunk ./src/allmydata/storage/server.py 13 -from allmydata.storage.mutable import MutableShareFile, EmptyShare, \ - create_mutable_sharefile -from allmydata.storage.immutable import ShareFile, BucketWriter, BucketReader -from allmydata.storage.crawler import BucketCountingCrawler from allmydata.storage.expirer import LeaseCheckingCrawler hunk ./src/allmydata/storage/server.py 14 - -# storage/ -# storage/shares/incoming -# incoming/ holds temp dirs named $START/$STORAGEINDEX/$SHARENUM which will -# be moved to storage/shares/$START/$STORAGEINDEX/$SHARENUM upon success -# storage/shares/$START/$STORAGEINDEX -# storage/shares/$START/$STORAGEINDEX/$SHARENUM - -# Where "$START" denotes the first 10 bits worth of $STORAGEINDEX (that's 2 -# base-32 chars). - -# $SHARENUM matches this regex: -NUM_RE=re.compile("^[0-9]+$") - +from allmydata.storage.crawler import BucketCountingCrawler class StorageServer(service.MultiService, Referenceable): hunk ./src/allmydata/storage/server.py 19 implements(RIStorageServer, IStatsProducer) + name = 'storage' LeaseCheckerClass = LeaseCheckingCrawler hunk ./src/allmydata/storage/server.py 22 + DEFAULT_EXPIRATION_POLICY = { + 'enabled': False, + 'mode': 'age', + 'override_lease_duration': None, + 'cutoff_date': None, + 'sharetypes': ('mutable', 'immutable'), + } hunk ./src/allmydata/storage/server.py 30 - def __init__(self, storedir, nodeid, reserved_space=0, - discard_storage=False, readonly_storage=False, + def __init__(self, nodeid, backend, reserved_space=0, + readonly_storage=False, stats_provider=None, hunk ./src/allmydata/storage/server.py 33 - expiration_enabled=False, - expiration_mode="age", - expiration_override_lease_duration=None, - expiration_cutoff_date=None, - expiration_sharetypes=("mutable", "immutable")): + expiration_policy=None): service.MultiService.__init__(self) assert isinstance(nodeid, str) assert len(nodeid) == 20 hunk ./src/allmydata/storage/server.py 38 self.my_nodeid = nodeid - self.storedir = storedir - sharedir = os.path.join(storedir, "shares") - fileutil.make_dirs(sharedir) - self.sharedir = sharedir - # we don't actually create the corruption-advisory dir until necessary - self.corruption_advisory_dir = os.path.join(storedir, - "corruption-advisories") - self.reserved_space = int(reserved_space) - self.no_storage = discard_storage - self.readonly_storage = readonly_storage self.stats_provider = stats_provider if self.stats_provider: self.stats_provider.register_producer(self) hunk ./src/allmydata/storage/server.py 41 - self.incomingdir = os.path.join(sharedir, 'incoming') - self._clean_incomplete() - fileutil.make_dirs(self.incomingdir) self._active_writers = weakref.WeakKeyDictionary() hunk ./src/allmydata/storage/server.py 42 + self.backend = backend + self.backend.setServiceParent(self) + self.backend.set_storage_server(self) log.msg("StorageServer created", facility="tahoe.storage") hunk ./src/allmydata/storage/server.py 47 - if reserved_space: - if self.get_available_space() is None: - log.msg("warning: [storage]reserved_space= is set, but this platform does not support an API to get disk statistics (statvfs(2) or GetDiskFreeSpaceEx), so this reservation cannot be honored", - umin="0wZ27w", level=log.UNUSUAL) - self.latencies = {"allocate": [], # immutable "write": [], "close": [], hunk ./src/allmydata/storage/server.py 58 "renew": [], "cancel": [], } - self.add_bucket_counter() - - statefile = os.path.join(self.storedir, "lease_checker.state") - historyfile = os.path.join(self.storedir, "lease_checker.history") - klass = self.LeaseCheckerClass - self.lease_checker = klass(self, statefile, historyfile, - expiration_enabled, expiration_mode, - expiration_override_lease_duration, - expiration_cutoff_date, - expiration_sharetypes) - self.lease_checker.setServiceParent(self) + self._setup_bucket_counter() + self._setup_lease_checker(expiration_policy or self.DEFAULT_EXPIRATION_POLICY) def __repr__(self): return "" % (idlib.shortnodeid_b2a(self.my_nodeid),) hunk ./src/allmydata/storage/server.py 64 - def add_bucket_counter(self): - statefile = os.path.join(self.storedir, "bucket_counter.state") - self.bucket_counter = BucketCountingCrawler(self, statefile) + def _setup_bucket_counter(self): + statefp = self.storedir.child("bucket_counter.state") + self.bucket_counter = BucketCountingCrawler(statefp) self.bucket_counter.setServiceParent(self) hunk ./src/allmydata/storage/server.py 69 + def _setup_lease_checker(self, expiration_policy): + statefp = self.storedir.child("lease_checker.state") + historyfp = self.storedir.child("lease_checker.history") + self.lease_checker = self.LeaseCheckerClass(statefp, historyfp, expiration_policy) + self.lease_checker.setServiceParent(self) + def count(self, name, delta=1): if self.stats_provider: self.stats_provider.count("storage_server." + name, delta) hunk ./src/allmydata/storage/server.py 89 """Return a dict, indexed by category, that contains a dict of latency numbers for each category. If there are sufficient samples for unambiguous interpretation, each dict will contain the - following keys: mean, 01_0_percentile, 10_0_percentile, + following keys: samplesize, mean, 01_0_percentile, 10_0_percentile, 50_0_percentile (median), 90_0_percentile, 95_0_percentile, 99_0_percentile, 99_9_percentile. If there are insufficient samples for a given percentile to be interpreted unambiguously hunk ./src/allmydata/storage/server.py 111 else: stats["mean"] = None - orderstatlist = [(0.01, "01_0_percentile", 100), (0.1, "10_0_percentile", 10),\ - (0.50, "50_0_percentile", 10), (0.90, "90_0_percentile", 10),\ - (0.95, "95_0_percentile", 20), (0.99, "99_0_percentile", 100),\ + orderstatlist = [(0.1, "10_0_percentile", 10), (0.5, "50_0_percentile", 10), \ + (0.9, "90_0_percentile", 10), (0.95, "95_0_percentile", 20), \ + (0.01, "01_0_percentile", 100), (0.99, "99_0_percentile", 100),\ (0.999, "99_9_percentile", 1000)] for percentile, percentilestring, minnumtoobserve in orderstatlist: hunk ./src/allmydata/storage/server.py 130 kwargs["facility"] = "tahoe.storage" return log.msg(*args, **kwargs) - def _clean_incomplete(self): - fileutil.rm_dir(self.incomingdir) + def get_nodeid(self): + return self.my_nodeid def get_stats(self): # remember: RIStatsProvider requires that our return dict hunk ./src/allmydata/storage/server.py 135 - # contains numeric values. + # contains numeric, or None values. stats = { 'storage_server.allocated': self.allocated_size(), } stats['storage_server.reserved_space'] = self.reserved_space for category,ld in self.get_latencies().items(): hunk ./src/allmydata/storage/server.py 142 for name,v in ld.items(): stats['storage_server.latencies.%s.%s' % (category, name)] = v - try: - disk = fileutil.get_disk_stats(self.sharedir, self.reserved_space) - writeable = disk['avail'] > 0 + self.backend.fill_in_space_stats(stats) hunk ./src/allmydata/storage/server.py 144 - # spacetime predictors should use disk_avail / (d(disk_used)/dt) - stats['storage_server.disk_total'] = disk['total'] - stats['storage_server.disk_used'] = disk['used'] - stats['storage_server.disk_free_for_root'] = disk['free_for_root'] - stats['storage_server.disk_free_for_nonroot'] = disk['free_for_nonroot'] - stats['storage_server.disk_avail'] = disk['avail'] - except AttributeError: - writeable = True - except EnvironmentError: - log.msg("OS call to get disk statistics failed", level=log.UNUSUAL) - writeable = False - - if self.readonly_storage: - stats['storage_server.disk_avail'] = 0 - writeable = False - - stats['storage_server.accepting_immutable_shares'] = int(writeable) s = self.bucket_counter.get_state() bucket_count = s.get("last-complete-bucket-count") if bucket_count: hunk ./src/allmydata/storage/server.py 151 return stats def get_available_space(self): - """Returns available space for share storage in bytes, or None if no - API to get this information is available.""" - - if self.readonly_storage: - return 0 - return fileutil.get_available_space(self.sharedir, self.reserved_space) + return self.backend.get_available_space() def allocated_size(self): space = 0 hunk ./src/allmydata/storage/server.py 160 return space def remote_get_version(self): - remaining_space = self.get_available_space() + remaining_space = self.backend.get_available_space() if remaining_space is None: # We're on a platform that has no API to get disk stats. remaining_space = 2**64 hunk ./src/allmydata/storage/server.py 176 } return version - def remote_allocate_buckets(self, storage_index, + def remote_allocate_buckets(self, storageindex, renew_secret, cancel_secret, sharenums, allocated_size, canary, owner_num=0): hunk ./src/allmydata/storage/server.py 180 + # cancel_secret is no longer used. # owner_num is not for clients to set, but rather it should be hunk ./src/allmydata/storage/server.py 182 - # curried into the PersonalStorageServer instance that is dedicated - # to a particular owner. + # curried into a StorageServer instance dedicated to a particular + # owner. start = time.time() self.count("allocate") hunk ./src/allmydata/storage/server.py 186 - alreadygot = set() + incoming = set() bucketwriters = {} # k: shnum, v: BucketWriter hunk ./src/allmydata/storage/server.py 188 - si_dir = storage_index_to_dir(storage_index) - si_s = si_b2a(storage_index) hunk ./src/allmydata/storage/server.py 189 + si_s = si_b2a(storageindex) log.msg("storage: allocate_buckets %s" % si_s) hunk ./src/allmydata/storage/server.py 192 - # in this implementation, the lease information (including secrets) - # goes into the share files themselves. It could also be put into a - # separate database. Note that the lease should not be added until - # the BucketWriter has been closed. + # Note that the lease should not be added until the BucketWriter + # has been closed. expire_time = time.time() + 31*24*60*60 hunk ./src/allmydata/storage/server.py 195 - lease_info = LeaseInfo(owner_num, - renew_secret, cancel_secret, + lease_info = LeaseInfo(owner_num, renew_secret, expire_time, self.my_nodeid) max_space_per_bucket = allocated_size hunk ./src/allmydata/storage/server.py 200 - remaining_space = self.get_available_space() + remaining_space = self.backend.get_available_space() limited = remaining_space is not None if limited: hunk ./src/allmydata/storage/server.py 203 - # this is a bit conservative, since some of this allocated_size() - # has already been written to disk, where it will show up in + # This is a bit conservative, since some of this allocated_size() + # has already been written to the backend, where it will show up in # get_available_space. remaining_space -= self.allocated_size() # self.readonly_storage causes remaining_space <= 0 hunk ./src/allmydata/storage/server.py 209 - # fill alreadygot with all shares that we have, not just the ones + # Fill alreadygot with all shares that we have, not just the ones # they asked about: this will save them a lot of work. Add or update # leases for all of them: if they want us to hold shares for this hunk ./src/allmydata/storage/server.py 212 - # file, they'll want us to hold leases for this file. - for (shnum, fn) in self._get_bucket_shares(storage_index): - alreadygot.add(shnum) - sf = ShareFile(fn) - sf.add_or_renew_lease(lease_info) + # file, they'll want us to hold leases for all the shares of it. + # + # XXX should we be making the assumption here that lease info is + # duplicated in all shares? + alreadygot = set() + for share in self.backend.get_shares(storageindex): + share.add_or_renew_lease(lease_info) + alreadygot.add(share.shnum) hunk ./src/allmydata/storage/server.py 221 - for shnum in sharenums: - incominghome = os.path.join(self.incomingdir, si_dir, "%d" % shnum) - finalhome = os.path.join(self.sharedir, si_dir, "%d" % shnum) - if os.path.exists(finalhome): - # great! we already have it. easy. - pass - elif os.path.exists(incominghome): - # Note that we don't create BucketWriters for shnums that - # have a partial share (in incoming/), so if a second upload - # occurs while the first is still in progress, the second - # uploader will use different storage servers. - pass - elif (not limited) or (remaining_space >= max_space_per_bucket): - # ok! we need to create the new share file. - bw = BucketWriter(self, incominghome, finalhome, - max_space_per_bucket, lease_info, canary) - if self.no_storage: - bw.throw_out_all_data = True + # all share numbers that are incoming + incoming = self.backend.get_incoming_shnums(storageindex) + + for shnum in ((sharenums - alreadygot) - incoming): + if (not limited) or (remaining_space >= max_space_per_bucket): + bw = self.backend.make_bucket_writer(storageindex, shnum, max_space_per_bucket, + lease_info, canary) bucketwriters[shnum] = bw self._active_writers[bw] = 1 if limited: hunk ./src/allmydata/storage/server.py 233 remaining_space -= max_space_per_bucket else: - # bummer! not enough space to accept this bucket + # Bummer not enough space to accept this share. pass hunk ./src/allmydata/storage/server.py 236 - if bucketwriters: - fileutil.make_dirs(os.path.join(self.sharedir, si_dir)) - self.add_latency("allocate", time.time() - start) return alreadygot, bucketwriters hunk ./src/allmydata/storage/server.py 239 - def _iter_share_files(self, storage_index): - for shnum, filename in self._get_bucket_shares(storage_index): - f = open(filename, 'rb') - header = f.read(32) - f.close() - if header[:32] == MutableShareFile.MAGIC: - sf = MutableShareFile(filename, self) - # note: if the share has been migrated, the renew_lease() - # call will throw an exception, with information to help the - # client update the lease. - elif header[:4] == struct.pack(">L", 1): - sf = ShareFile(filename) - else: - continue # non-sharefile - yield sf - - def remote_add_lease(self, storage_index, renew_secret, cancel_secret, + def remote_add_lease(self, storageindex, renew_secret, cancel_secret, owner_num=1): hunk ./src/allmydata/storage/server.py 241 + # cancel_secret is no longer used. start = time.time() self.count("add-lease") new_expire_time = time.time() + 31*24*60*60 hunk ./src/allmydata/storage/server.py 245 - lease_info = LeaseInfo(owner_num, - renew_secret, cancel_secret, + lease_info = LeaseInfo(owner_num, renew_secret, new_expire_time, self.my_nodeid) hunk ./src/allmydata/storage/server.py 247 - for sf in self._iter_share_files(storage_index): - sf.add_or_renew_lease(lease_info) - self.add_latency("add-lease", time.time() - start) - return None hunk ./src/allmydata/storage/server.py 248 - def remote_renew_lease(self, storage_index, renew_secret): + try: + self.backend.add_or_renew_lease(lease_info) + finally: + self.add_latency("add-lease", time.time() - start) + + def remote_renew_lease(self, storageindex, renew_secret): start = time.time() self.count("renew") hunk ./src/allmydata/storage/server.py 256 - new_expire_time = time.time() + 31*24*60*60 - found_buckets = False - for sf in self._iter_share_files(storage_index): - found_buckets = True - sf.renew_lease(renew_secret, new_expire_time) - self.add_latency("renew", time.time() - start) - if not found_buckets: - raise IndexError("no such lease to renew") + + try: + shareset = self.backend.get_shareset(storageindex) + new_expiration_time = start + 31*24*60*60 # one month from now + shareset.renew_lease(renew_secret, new_expiration_time) + finally: + self.add_latency("renew", time.time() - start) def bucket_writer_closed(self, bw, consumed_size): if self.stats_provider: hunk ./src/allmydata/storage/server.py 269 self.stats_provider.count('storage_server.bytes_added', consumed_size) del self._active_writers[bw] - def _get_bucket_shares(self, storage_index): - """Return a list of (shnum, pathname) tuples for files that hold - shares for this storage_index. In each tuple, 'shnum' will always be - the integer form of the last component of 'pathname'.""" - storagedir = os.path.join(self.sharedir, storage_index_to_dir(storage_index)) - try: - for f in os.listdir(storagedir): - if NUM_RE.match(f): - filename = os.path.join(storagedir, f) - yield (int(f), filename) - except OSError: - # Commonly caused by there being no buckets at all. - pass - - def remote_get_buckets(self, storage_index): + def remote_get_buckets(self, storageindex): start = time.time() self.count("get") hunk ./src/allmydata/storage/server.py 272 - si_s = si_b2a(storage_index) + si_s = si_b2a(storageindex) log.msg("storage: get_buckets %s" % si_s) bucketreaders = {} # k: sharenum, v: BucketReader hunk ./src/allmydata/storage/server.py 275 - for shnum, filename in self._get_bucket_shares(storage_index): - bucketreaders[shnum] = BucketReader(self, filename, - storage_index, shnum) - self.add_latency("get", time.time() - start) - return bucketreaders hunk ./src/allmydata/storage/server.py 276 - def get_leases(self, storage_index): - """Provide an iterator that yields all of the leases attached to this - bucket. Each lease is returned as a LeaseInfo instance. + try: + shareset = self.backend.get_shareset(storageindex) + for share in shareset.get_shares(storageindex): + bucketreaders[share.get_shnum()] = self.backend.make_bucket_reader(self, share) + return bucketreaders + finally: + self.add_latency("get", time.time() - start) hunk ./src/allmydata/storage/server.py 284 - This method is not for client use. + def get_leases(self, storageindex): """ hunk ./src/allmydata/storage/server.py 286 + Provide an iterator that yields all of the leases attached to this + bucket. Each lease is returned as a LeaseInfo instance. hunk ./src/allmydata/storage/server.py 289 - # since all shares get the same lease data, we just grab the leases - # from the first share - try: - shnum, filename = self._get_bucket_shares(storage_index).next() - sf = ShareFile(filename) - return sf.get_leases() - except StopIteration: - return iter([]) + This method is not for client use. XXX do we need it at all? + """ + return self.backend.get_shareset(storageindex).get_leases() hunk ./src/allmydata/storage/server.py 293 - def remote_slot_testv_and_readv_and_writev(self, storage_index, + def remote_slot_testv_and_readv_and_writev(self, storageindex, secrets, test_and_write_vectors, read_vector): hunk ./src/allmydata/storage/server.py 299 start = time.time() self.count("writev") - si_s = si_b2a(storage_index) + si_s = si_b2a(storageindex) log.msg("storage: slot_writev %s" % si_s) hunk ./src/allmydata/storage/server.py 301 - si_dir = storage_index_to_dir(storage_index) - (write_enabler, renew_secret, cancel_secret) = secrets - # shares exist if there is a file for them - bucketdir = os.path.join(self.sharedir, si_dir) - shares = {} - if os.path.isdir(bucketdir): - for sharenum_s in os.listdir(bucketdir): - try: - sharenum = int(sharenum_s) - except ValueError: - continue - filename = os.path.join(bucketdir, sharenum_s) - msf = MutableShareFile(filename, self) - msf.check_write_enabler(write_enabler, si_s) - shares[sharenum] = msf - # write_enabler is good for all existing shares. - - # Now evaluate test vectors. - testv_is_good = True - for sharenum in test_and_write_vectors: - (testv, datav, new_length) = test_and_write_vectors[sharenum] - if sharenum in shares: - if not shares[sharenum].check_testv(testv): - self.log("testv failed: [%d]: %r" % (sharenum, testv)) - testv_is_good = False - break - else: - # compare the vectors against an empty share, in which all - # reads return empty strings. - if not EmptyShare().check_testv(testv): - self.log("testv failed (empty): [%d] %r" % (sharenum, - testv)) - testv_is_good = False - break hunk ./src/allmydata/storage/server.py 302 - # now gather the read vectors, before we do any writes - read_data = {} - for sharenum, share in shares.items(): - read_data[sharenum] = share.readv(read_vector) - - ownerid = 1 # TODO - expire_time = time.time() + 31*24*60*60 # one month - lease_info = LeaseInfo(ownerid, - renew_secret, cancel_secret, - expire_time, self.my_nodeid) - - if testv_is_good: - # now apply the write vectors - for sharenum in test_and_write_vectors: - (testv, datav, new_length) = test_and_write_vectors[sharenum] - if new_length == 0: - if sharenum in shares: - shares[sharenum].unlink() - else: - if sharenum not in shares: - # allocate a new share - allocated_size = 2000 # arbitrary, really - share = self._allocate_slot_share(bucketdir, secrets, - sharenum, - allocated_size, - owner_num=0) - shares[sharenum] = share - shares[sharenum].writev(datav, new_length) - # and update the lease - shares[sharenum].add_or_renew_lease(lease_info) - - if new_length == 0: - # delete empty bucket directories - if not os.listdir(bucketdir): - os.rmdir(bucketdir) - - - # all done - self.add_latency("writev", time.time() - start) - return (testv_is_good, read_data) - - def _allocate_slot_share(self, bucketdir, secrets, sharenum, - allocated_size, owner_num=0): - (write_enabler, renew_secret, cancel_secret) = secrets - my_nodeid = self.my_nodeid - fileutil.make_dirs(bucketdir) - filename = os.path.join(bucketdir, "%d" % sharenum) - share = create_mutable_sharefile(filename, my_nodeid, write_enabler, - self) - return share + try: + shareset = self.backend.get_shareset(storageindex) + expiration_time = start + 31*24*60*60 # one month from now + return shareset.testv_and_readv_and_writev(self, secrets, test_and_write_vectors, + read_vector, expiration_time) + finally: + self.add_latency("writev", time.time() - start) hunk ./src/allmydata/storage/server.py 310 - def remote_slot_readv(self, storage_index, shares, readv): + def remote_slot_readv(self, storageindex, shares, readv): start = time.time() self.count("readv") hunk ./src/allmydata/storage/server.py 313 - si_s = si_b2a(storage_index) - lp = log.msg("storage: slot_readv %s %s" % (si_s, shares), - facility="tahoe.storage", level=log.OPERATIONAL) - si_dir = storage_index_to_dir(storage_index) - # shares exist if there is a file for them - bucketdir = os.path.join(self.sharedir, si_dir) - if not os.path.isdir(bucketdir): + si_s = si_b2a(storageindex) + log.msg("storage: slot_readv %s %s" % (si_s, shares), + facility="tahoe.storage", level=log.OPERATIONAL) + + try: + shareset = self.backend.get_shareset(storageindex) + return shareset.readv(self, shares, readv) + finally: self.add_latency("readv", time.time() - start) hunk ./src/allmydata/storage/server.py 322 - return {} - datavs = {} - for sharenum_s in os.listdir(bucketdir): - try: - sharenum = int(sharenum_s) - except ValueError: - continue - if sharenum in shares or not shares: - filename = os.path.join(bucketdir, sharenum_s) - msf = MutableShareFile(filename, self) - datavs[sharenum] = msf.readv(readv) - log.msg("returning shares %s" % (datavs.keys(),), - facility="tahoe.storage", level=log.NOISY, parent=lp) - self.add_latency("readv", time.time() - start) - return datavs hunk ./src/allmydata/storage/server.py 323 - def remote_advise_corrupt_share(self, share_type, storage_index, shnum, + def remote_advise_corrupt_share(self, share_type, storageindex, shnum, reason): fileutil.make_dirs(self.corruption_advisory_dir) now = time_format.iso_utc(sep="T") hunk ./src/allmydata/storage/server.py 327 - si_s = si_b2a(storage_index) + si_s = si_b2a(storageindex) # windows can't handle colons in the filename fn = os.path.join(self.corruption_advisory_dir, "%s--%s-%d" % (now, si_s, shnum)).replace(":","") hunk ./src/allmydata/storage/server.py 334 f = open(fn, "w") f.write("report: Share Corruption\n") f.write("type: %s\n" % share_type) - f.write("storage_index: %s\n" % si_s) + f.write("storageindex: %s\n" % si_s) f.write("share_number: %d\n" % shnum) f.write("\n") f.write(reason) addfile ./src/allmydata/test/test_backends.py hunk ./src/allmydata/test/test_backends.py 1 +import os, stat +from twisted.trial import unittest +from allmydata.util.log import msg +from allmydata.test.common_util import ReallyEqualMixin +import mock +# This is the code that we're going to be testing. +from allmydata.storage.server import StorageServer +from allmydata.storage.backends.disk.core import DiskBackend +from allmydata.storage.backends.null.core import NullBackend +from allmydata.storage.common import si_si2dir +# The following share file content was generated with +# storage.immutable.ShareFile from Tahoe-LAFS v1.8.2 +# with share data == 'a'. The total size of this input +# is 85 bytes. +shareversionnumber = '\x00\x00\x00\x01' +sharedatalength = '\x00\x00\x00\x01' +numberofleases = '\x00\x00\x00\x01' +shareinputdata = 'a' +ownernumber = '\x00\x00\x00\x00' +renewsecret = 'x'*32 +cancelsecret = 'y'*32 +expirationtime = '\x00(\xde\x80' +nextlease = '' +containerdata = shareversionnumber + sharedatalength + numberofleases +client_data = shareinputdata + ownernumber + renewsecret + \ + cancelsecret + expirationtime + nextlease +share_data = containerdata + client_data +testnodeid = 'testnodeidxxxxxxxxxx' + + +class MockFileSystem(unittest.TestCase): + """ I simulate a filesystem that the code under test can use. I simulate + just the parts of the filesystem that the current implementation of Disk + backend needs. """ + def setUp(self): + # Make patcher, patch, and effects for disk-using functions. + msg( "%s.setUp()" % (self,)) + self.mockedfilepaths = {} + # keys are pathnames, values are MockFilePath objects. This is necessary because + # MockFilePath behavior sometimes depends on the filesystem. Where it does, + # self.mockedfilepaths has the relevant information. + self.storedir = MockFilePath('teststoredir', self.mockedfilepaths) + self.basedir = self.storedir.child('shares') + self.baseincdir = self.basedir.child('incoming') + self.sharedirfinalname = self.basedir.child('or').child('orsxg5dtorxxeylhmvpws3temv4a') + self.sharedirincomingname = self.baseincdir.child('or').child('orsxg5dtorxxeylhmvpws3temv4a') + self.shareincomingname = self.sharedirincomingname.child('0') + self.sharefinalname = self.sharedirfinalname.child('0') + + self.FilePathFake = mock.patch('allmydata.storage.backends.disk.core.FilePath', new = MockFilePath) + self.FilePathFake.__enter__() + + self.BCountingCrawler = mock.patch('allmydata.storage.backends.disk.core.BucketCountingCrawler') + FakeBCC = self.BCountingCrawler.__enter__() + FakeBCC.side_effect = self.call_FakeBCC + + self.LeaseCheckingCrawler = mock.patch('allmydata.storage.backends.disk.core.LeaseCheckingCrawler') + FakeLCC = self.LeaseCheckingCrawler.__enter__() + FakeLCC.side_effect = self.call_FakeLCC + + self.get_available_space = mock.patch('allmydata.util.fileutil.get_available_space') + GetSpace = self.get_available_space.__enter__() + GetSpace.side_effect = self.call_get_available_space + + self.statforsize = mock.patch('allmydata.storage.backends.disk.core.filepath.stat') + getsize = self.statforsize.__enter__() + getsize.side_effect = self.call_statforsize + + def call_FakeBCC(self, StateFile): + return MockBCC() + + def call_FakeLCC(self, StateFile, HistoryFile, ExpirationPolicy): + return MockLCC() + + def call_get_available_space(self, storedir, reservedspace): + # The input vector has an input size of 85. + return 85 - reservedspace + + def call_statforsize(self, fakefpname): + return self.mockedfilepaths[fakefpname].fileobject.size() + + def tearDown(self): + msg( "%s.tearDown()" % (self,)) + self.FilePathFake.__exit__() + self.mockedfilepaths = {} + + +class MockFilePath: + def __init__(self, pathstring, ffpathsenvironment, existence=False): + # I can't just make the values MockFileObjects because they may be directories. + self.mockedfilepaths = ffpathsenvironment + self.path = pathstring + self.existence = existence + if not self.mockedfilepaths.has_key(self.path): + # The first MockFilePath object is special + self.mockedfilepaths[self.path] = self + self.fileobject = None + else: + self.fileobject = self.mockedfilepaths[self.path].fileobject + self.spawn = {} + self.antecedent = os.path.dirname(self.path) + + def setContent(self, contentstring): + # This method rewrites the data in the file that corresponds to its path + # name whether it preexisted or not. + self.fileobject = MockFileObject(contentstring) + self.existence = True + self.mockedfilepaths[self.path].fileobject = self.fileobject + self.mockedfilepaths[self.path].existence = self.existence + self.setparents() + + def create(self): + # This method chokes if there's a pre-existing file! + if self.mockedfilepaths[self.path].fileobject: + raise OSError + else: + self.existence = True + self.mockedfilepaths[self.path].fileobject = self.fileobject + self.mockedfilepaths[self.path].existence = self.existence + self.setparents() + + def open(self, mode='r'): + # XXX Makes no use of mode. + if not self.mockedfilepaths[self.path].fileobject: + # If there's no fileobject there already then make one and put it there. + self.fileobject = MockFileObject() + self.existence = True + self.mockedfilepaths[self.path].fileobject = self.fileobject + self.mockedfilepaths[self.path].existence = self.existence + else: + # Otherwise get a ref to it. + self.fileobject = self.mockedfilepaths[self.path].fileobject + self.existence = self.mockedfilepaths[self.path].existence + return self.fileobject.open(mode) + + def child(self, childstring): + arg2child = os.path.join(self.path, childstring) + child = MockFilePath(arg2child, self.mockedfilepaths) + return child + + def children(self): + childrenfromffs = [ffp for ffp in self.mockedfilepaths.values() if ffp.path.startswith(self.path)] + childrenfromffs = [ffp for ffp in childrenfromffs if not ffp.path.endswith(self.path)] + childrenfromffs = [ffp for ffp in childrenfromffs if ffp.exists()] + self.spawn = frozenset(childrenfromffs) + return self.spawn + + def parent(self): + if self.mockedfilepaths.has_key(self.antecedent): + parent = self.mockedfilepaths[self.antecedent] + else: + parent = MockFilePath(self.antecedent, self.mockedfilepaths) + return parent + + def parents(self): + antecedents = [] + def f(fps, antecedents): + newfps = os.path.split(fps)[0] + if newfps: + antecedents.append(newfps) + f(newfps, antecedents) + f(self.path, antecedents) + return antecedents + + def setparents(self): + for fps in self.parents(): + if not self.mockedfilepaths.has_key(fps): + self.mockedfilepaths[fps] = MockFilePath(fps, self.mockedfilepaths, exists=True) + + def basename(self): + return os.path.split(self.path)[1] + + def moveTo(self, newffp): + # XXX Makes no distinction between file and directory arguments, this is deviation from filepath.moveTo + if self.mockedfilepaths[newffp.path].exists(): + raise OSError + else: + self.mockedfilepaths[newffp.path] = self + self.path = newffp.path + + def getsize(self): + return self.fileobject.getsize() + + def exists(self): + return self.existence + + def isdir(self): + return True + + def makedirs(self): + # XXX These methods assume that fp_ functions in fileutil will be tested elsewhere! + pass + + def remove(self): + pass + + +class MockFileObject: + def __init__(self, contentstring=''): + self.buffer = contentstring + self.pos = 0 + def open(self, mode='r'): + return self + def write(self, instring): + begin = self.pos + padlen = begin - len(self.buffer) + if padlen > 0: + self.buffer += '\x00' * padlen + end = self.pos + len(instring) + self.buffer = self.buffer[:begin]+instring+self.buffer[end:] + self.pos = end + def close(self): + self.pos = 0 + def seek(self, pos): + self.pos = pos + def read(self, numberbytes): + return self.buffer[self.pos:self.pos+numberbytes] + def tell(self): + return self.pos + def size(self): + # XXX This method A: Is not to be found in a real file B: Is part of a wild-mung-up of filepath.stat! + # XXX Finally we shall hopefully use a getsize method soon, must consult first though. + # Hmmm... perhaps we need to sometimes stat the address when there's not a mockfileobject present? + return {stat.ST_SIZE:len(self.buffer)} + def getsize(self): + return len(self.buffer) + +class MockBCC: + def setServiceParent(self, Parent): + pass + + +class MockLCC: + def setServiceParent(self, Parent): + pass + + +class TestServerWithNullBackend(unittest.TestCase, ReallyEqualMixin): + """ NullBackend is just for testing and executable documentation, so + this test is actually a test of StorageServer in which we're using + NullBackend as helper code for the test, rather than a test of + NullBackend. """ + def setUp(self): + self.ss = StorageServer(testnodeid, NullBackend()) + + @mock.patch('os.mkdir') + @mock.patch('__builtin__.open') + @mock.patch('os.listdir') + @mock.patch('os.path.isdir') + def test_write_share(self, mockisdir, mocklistdir, mockopen, mockmkdir): + """ + Write a new share. This tests that StorageServer's remote_allocate_buckets + generates the correct return types when given test-vector arguments. That + bs is of the correct type is verified by attempting to invoke remote_write + on bs[0]. + """ + alreadygot, bs = self.ss.remote_allocate_buckets('teststorage_index', 'x'*32, 'y'*32, set((0,)), 1, mock.Mock()) + bs[0].remote_write(0, 'a') + self.failIf(mockisdir.called) + self.failIf(mocklistdir.called) + self.failIf(mockopen.called) + self.failIf(mockmkdir.called) + + +class TestServerConstruction(MockFileSystem, ReallyEqualMixin): + def test_create_server_disk_backend(self): + """ This tests whether a server instance can be constructed with a + filesystem backend. To pass the test, it mustn't use the filesystem + outside of its configured storedir. """ + StorageServer(testnodeid, DiskBackend(self.storedir)) + + +class TestServerAndDiskBackend(MockFileSystem, ReallyEqualMixin): + """ This tests both the StorageServer and the Disk backend together. """ + def setUp(self): + MockFileSystem.setUp(self) + try: + self.backend = DiskBackend(self.storedir) + self.ss = StorageServer(testnodeid, self.backend) + + self.backendwithreserve = DiskBackend(self.storedir, reserved_space = 1) + self.sswithreserve = StorageServer(testnodeid, self.backendwithreserve) + except: + MockFileSystem.tearDown(self) + raise + + @mock.patch('time.time') + @mock.patch('allmydata.util.fileutil.get_available_space') + def test_out_of_space(self, mockget_available_space, mocktime): + mocktime.return_value = 0 + + def call_get_available_space(dir, reserve): + return 0 + + mockget_available_space.side_effect = call_get_available_space + alreadygotc, bsc = self.sswithreserve.remote_allocate_buckets('teststorage_index', 'x'*32, 'y'*32, set((0,)), 1, mock.Mock()) + self.failUnlessReallyEqual(bsc, {}) + + @mock.patch('time.time') + def test_write_and_read_share(self, mocktime): + """ + Write a new share, read it, and test the server's (and disk backend's) + handling of simultaneous and successive attempts to write the same + share. + """ + mocktime.return_value = 0 + # Inspect incoming and fail unless it's empty. + incomingset = self.ss.backend.get_incoming_shnums('teststorage_index') + + self.failUnlessReallyEqual(incomingset, frozenset()) + + # Populate incoming with the sharenum: 0. + alreadygot, bs = self.ss.remote_allocate_buckets('teststorage_index', 'x'*32, 'y'*32, frozenset((0,)), 1, mock.Mock()) + + # This is a transparent-box test: Inspect incoming and fail unless the sharenum: 0 is listed there. + self.failUnlessReallyEqual(self.ss.backend.get_incoming_shnums('teststorage_index'), frozenset((0,))) + + + + # Attempt to create a second share writer with the same sharenum. + alreadygota, bsa = self.ss.remote_allocate_buckets('teststorage_index', 'x'*32, 'y'*32, frozenset((0,)), 1, mock.Mock()) + + # Show that no sharewriter results from a remote_allocate_buckets + # with the same si and sharenum, until BucketWriter.remote_close() + # has been called. + self.failIf(bsa) + + # Test allocated size. + spaceint = self.ss.allocated_size() + self.failUnlessReallyEqual(spaceint, 1) + + # Write 'a' to shnum 0. Only tested together with close and read. + bs[0].remote_write(0, 'a') + + # Preclose: Inspect final, failUnless nothing there. + self.failUnlessReallyEqual(len(list(self.backend.get_shares('teststorage_index'))), 0) + bs[0].remote_close() + + # Postclose: (Omnibus) failUnless written data is in final. + sharesinfinal = list(self.backend.get_shares('teststorage_index')) + self.failUnlessReallyEqual(len(sharesinfinal), 1) + contents = sharesinfinal[0].read_share_data(0, 73) + self.failUnlessReallyEqual(contents, client_data) + + # Exercise the case that the share we're asking to allocate is + # already (completely) uploaded. + self.ss.remote_allocate_buckets('teststorage_index', 'x'*32, 'y'*32, set((0,)), 1, mock.Mock()) + + + def test_read_old_share(self): + """ This tests whether the code correctly finds and reads + shares written out by old (Tahoe-LAFS <= v1.8.2) + servers. There is a similar test in test_download, but that one + is from the perspective of the client and exercises a deeper + stack of code. This one is for exercising just the + StorageServer object. """ + # Contruct a file with the appropriate contents in the mockfilesystem. + datalen = len(share_data) + finalhome = si_si2dir(self.basedir, 'teststorage_index').child(str(0)) + finalhome.setContent(share_data) + + # Now begin the test. + bs = self.ss.remote_get_buckets('teststorage_index') + + self.failUnlessEqual(len(bs), 1) + b = bs['0'] + # These should match by definition, the next two cases cover cases without (completely) unambiguous behaviors. + self.failUnlessReallyEqual(b.remote_read(0, datalen), client_data) + # If you try to read past the end you get the as much data as is there. + self.failUnlessReallyEqual(b.remote_read(0, datalen+20), client_data) + # If you start reading past the end of the file you get the empty string. + self.failUnlessReallyEqual(b.remote_read(datalen+1, 3), '') hunk ./src/allmydata/test/test_mutable.py 6 from cStringIO import StringIO from twisted.trial import unittest from twisted.internet import defer, reactor -from twisted.internet.interfaces import IConsumer -from zope.interface import implements from allmydata import uri, client from allmydata.nodemaker import NodeMaker from allmydata.util import base32, consumer, fileutil, mathutil hunk ./src/allmydata/test/test_storage.py 3289 def test_expire_age(self): basedir = "storage/LeaseCrawler/expire_age" fileutil.make_dirs(basedir) - # setting expiration_time to 2000 means that any lease which is more - # than 2000s old will be expired. - ss = InstrumentedStorageServer(basedir, "\x00" * 20, - expiration_enabled=True, - expiration_mode="age", - expiration_override_lease_duration=2000) + # setting 'override_lease_duration' to 2000 means that any lease that + # is more than 2000 seconds old will be expired. + expiration_policy = { + 'enabled': True, + 'mode': 'age', + 'override_lease_duration': 2000, + 'sharetypes': ('mutable', 'immutable'), + } + ss = InstrumentedStorageServer(basedir, "\x00" * 20, expiration_policy) # make it start sooner than usual. lc = ss.lease_checker lc.slow_start = 0 hunk ./src/allmydata/test/test_storage.py 3430 def test_expire_cutoff_date(self): basedir = "storage/LeaseCrawler/expire_cutoff_date" fileutil.make_dirs(basedir) - # setting cutoff-date to 2000 seconds ago means that any lease which - # is more than 2000s old will be expired. + # setting 'cutoff_date' to 2000 seconds ago means that any lease that + # is more than 2000 seconds old will be expired. now = time.time() then = int(now - 2000) hunk ./src/allmydata/test/test_storage.py 3434 - ss = InstrumentedStorageServer(basedir, "\x00" * 20, - expiration_enabled=True, - expiration_mode="cutoff-date", - expiration_cutoff_date=then) + expiration_policy = { + 'enabled': True, + 'mode': 'cutoff-date', + 'cutoff_date': then, + 'sharetypes': ('mutable', 'immutable'), + } + ss = InstrumentedStorageServer(basedir, "\x00" * 20, expiration_policy) # make it start sooner than usual. lc = ss.lease_checker lc.slow_start = 0 hunk ./src/allmydata/test/test_storage.py 3582 def test_only_immutable(self): basedir = "storage/LeaseCrawler/only_immutable" fileutil.make_dirs(basedir) + # setting 'cutoff_date' to 2000 seconds ago means that any lease that + # is more than 2000 seconds old will be expired. now = time.time() then = int(now - 2000) hunk ./src/allmydata/test/test_storage.py 3586 - ss = StorageServer(basedir, "\x00" * 20, - expiration_enabled=True, - expiration_mode="cutoff-date", - expiration_cutoff_date=then, - expiration_sharetypes=("immutable",)) + expiration_policy = { + 'enabled': True, + 'mode': 'cutoff-date', + 'cutoff_date': then, + 'sharetypes': ('immutable',), + } + ss = StorageServer(basedir, "\x00" * 20, expiration_policy) lc = ss.lease_checker lc.slow_start = 0 webstatus = StorageStatus(ss) hunk ./src/allmydata/test/test_storage.py 3643 def test_only_mutable(self): basedir = "storage/LeaseCrawler/only_mutable" fileutil.make_dirs(basedir) + # setting 'cutoff_date' to 2000 seconds ago means that any lease that + # is more than 2000 seconds old will be expired. now = time.time() then = int(now - 2000) hunk ./src/allmydata/test/test_storage.py 3647 - ss = StorageServer(basedir, "\x00" * 20, - expiration_enabled=True, - expiration_mode="cutoff-date", - expiration_cutoff_date=then, - expiration_sharetypes=("mutable",)) + expiration_policy = { + 'enabled': True, + 'mode': 'cutoff-date', + 'cutoff_date': then, + 'sharetypes': ('mutable',), + } + ss = StorageServer(basedir, "\x00" * 20, expiration_policy) lc = ss.lease_checker lc.slow_start = 0 webstatus = StorageStatus(ss) hunk ./src/allmydata/test/test_storage.py 3826 def test_no_st_blocks(self): basedir = "storage/LeaseCrawler/no_st_blocks" fileutil.make_dirs(basedir) - ss = No_ST_BLOCKS_StorageServer(basedir, "\x00" * 20, - expiration_mode="age", - expiration_override_lease_duration=-1000) - # a negative expiration_time= means the "configured-" + # A negative 'override_lease_duration' means that the "configured-" # space-recovered counts will be non-zero, since all shares will have hunk ./src/allmydata/test/test_storage.py 3828 - # expired by then + # expired by then. + expiration_policy = { + 'enabled': True, + 'mode': 'age', + 'override_lease_duration': -1000, + 'sharetypes': ('mutable', 'immutable'), + } + ss = No_ST_BLOCKS_StorageServer(basedir, "\x00" * 20, expiration_policy) # make it start sooner than usual. lc = ss.lease_checker hunk ./src/allmydata/util/encodingutil.py 221 def quote_path(path, quotemarks=True): return quote_output("/".join(map(to_str, path)), quotemarks=quotemarks) +def quote_filepath(fp, quotemarks=True, encoding=None): + path = fp.path + if isinstance(path, str): + try: + path = path.decode(filesystem_encoding) + except UnicodeDecodeError: + return 'b"%s"' % (ESCAPABLE_8BIT.sub(_str_escape, path),) + + return quote_output(path, quotemarks=quotemarks, encoding=encoding) + def unicode_platform(): """ hunk ./src/allmydata/util/fileutil.py 5 Futz with files like a pro. """ -import sys, exceptions, os, stat, tempfile, time, binascii +import errno, sys, exceptions, os, stat, tempfile, time, binascii + +from allmydata.util.assertutil import precondition from twisted.python import log hunk ./src/allmydata/util/fileutil.py 10 +from twisted.python.filepath import FilePath, UnlistableError from pycryptopp.cipher.aes import AES hunk ./src/allmydata/util/fileutil.py 189 raise tx raise exceptions.IOError, "unknown error prevented creation of directory, or deleted the directory immediately after creation: %s" % dirname # careful not to construct an IOError with a 2-tuple, as that has a special meaning... -def rm_dir(dirname): +def fp_make_dirs(dirfp): + """ + An idempotent version of FilePath.makedirs(). If the dir already + exists, do nothing and return without raising an exception. If this + call creates the dir, return without raising an exception. If there is + an error that prevents creation or if the directory gets deleted after + fp_make_dirs() creates it and before fp_make_dirs() checks that it + exists, raise an exception. + """ + log.msg( "xxx 0 %s" % (dirfp,)) + tx = None + try: + dirfp.makedirs() + except OSError, x: + tx = x + + if not dirfp.isdir(): + if tx: + raise tx + raise exceptions.IOError, "unknown error prevented creation of directory, or deleted the directory immediately after creation: %s" % dirfp # careful not to construct an IOError with a 2-tuple, as that has a special meaning... + +def fp_rmdir_if_empty(dirfp): + """ Remove the directory if it is empty. """ + try: + os.rmdir(dirfp.path) + except OSError, e: + if e.errno != errno.ENOTEMPTY: + raise + else: + dirfp.changed() + +def rmtree(dirname): """ A threadsafe and idempotent version of shutil.rmtree(). If the dir is already gone, do nothing and return without raising an exception. If this hunk ./src/allmydata/util/fileutil.py 239 else: remove(fullname) os.rmdir(dirname) - except Exception, le: - # Ignore "No such file or directory" - if (not isinstance(le, OSError)) or le.args[0] != 2: + except EnvironmentError, le: + # Ignore "No such file or directory", collect any other exception. + if (le.args[0] != 2 and le.args[0] != 3) or (le.args[0] != errno.ENOENT): excs.append(le) hunk ./src/allmydata/util/fileutil.py 243 + except Exception, le: + excs.append(le) # Okay, now we've recursively removed everything, ignoring any "No # such file or directory" errors, and collecting any other errors. hunk ./src/allmydata/util/fileutil.py 256 raise OSError, "Failed to remove dir for unknown reason." raise OSError, excs +def fp_remove(dirfp): + """ + An idempotent version of shutil.rmtree(). If the dir is already gone, + do nothing and return without raising an exception. If this call + removes the dir, return without raising an exception. If there is an + error that prevents removal or if the directory gets created again by + someone else after this deletes it and before this checks that it is + gone, raise an exception. + """ + try: + dirfp.remove() + except UnlistableError, e: + if e.originalException.errno != errno.ENOENT: + raise + except OSError, e: + if e.errno != errno.ENOENT: + raise + +def rm_dir(dirname): + # Renamed to be like shutil.rmtree and unlike rmdir. + return rmtree(dirname) def remove_if_possible(f): try: hunk ./src/allmydata/util/fileutil.py 387 import traceback traceback.print_exc() -def get_disk_stats(whichdir, reserved_space=0): +def get_disk_stats(whichdirfp, reserved_space=0): """Return disk statistics for the storage disk, in the form of a dict with the following fields. total: total bytes on disk hunk ./src/allmydata/util/fileutil.py 408 you can pass how many bytes you would like to leave unused on this filesystem as reserved_space. """ + precondition(isinstance(whichdirfp, FilePath), whichdirfp) if have_GetDiskFreeSpaceExW: # If this is a Windows system and GetDiskFreeSpaceExW is available, use it. hunk ./src/allmydata/util/fileutil.py 419 n_free_for_nonroot = c_ulonglong(0) n_total = c_ulonglong(0) n_free_for_root = c_ulonglong(0) - retval = GetDiskFreeSpaceExW(whichdir, byref(n_free_for_nonroot), + retval = GetDiskFreeSpaceExW(whichdirfp.path, byref(n_free_for_nonroot), byref(n_total), byref(n_free_for_root)) if retval == 0: hunk ./src/allmydata/util/fileutil.py 424 raise OSError("Windows error %d attempting to get disk statistics for %r" - % (GetLastError(), whichdir)) + % (GetLastError(), whichdirfp.path)) free_for_nonroot = n_free_for_nonroot.value total = n_total.value free_for_root = n_free_for_root.value hunk ./src/allmydata/util/fileutil.py 433 # # # - s = os.statvfs(whichdir) + s = os.statvfs(whichdirfp.path) # on my mac laptop: # statvfs(2) is a wrapper around statfs(2). hunk ./src/allmydata/util/fileutil.py 460 'avail': avail, } -def get_available_space(whichdir, reserved_space): +def get_available_space(whichdirfp, reserved_space): """Returns available space for share storage in bytes, or None if no API to get this information is available. hunk ./src/allmydata/util/fileutil.py 472 you can pass how many bytes you would like to leave unused on this filesystem as reserved_space. """ + precondition(isinstance(whichdirfp, FilePath), whichdirfp) try: hunk ./src/allmydata/util/fileutil.py 474 - return get_disk_stats(whichdir, reserved_space)['avail'] + return get_disk_stats(whichdirfp, reserved_space)['avail'] except AttributeError: return None hunk ./src/allmydata/util/fileutil.py 477 - except EnvironmentError: - log.msg("OS call to get disk statistics failed") + + +def get_used_space(fp): + if fp is None: return 0 hunk ./src/allmydata/util/fileutil.py 482 + try: + s = os.stat(fp.path) + except EnvironmentError: + if not fp.exists(): + return 0 + raise + else: + # POSIX defines st_blocks (originally a BSDism): + # + # but does not require stat() to give it a "meaningful value" + # + # and says: + # "The unit for the st_blocks member of the stat structure is not defined + # within IEEE Std 1003.1-2001. In some implementations it is 512 bytes. + # It may differ on a file system basis. There is no correlation between + # values of the st_blocks and st_blksize, and the f_bsize (from ) + # structure members." + # + # The Linux docs define it as "the number of blocks allocated to the file, + # [in] 512-byte units." It is also defined that way on MacOS X. Python does + # not set the attribute on Windows. + # + # We consider platforms that define st_blocks but give it a wrong value, or + # measure it in a unit other than 512 bytes, to be broken. See also + # . + + if hasattr(s, 'st_blocks'): + return s.st_blocks * 512 + else: + return s.st_size } Context: [tests: bump up the timeout in this test that fails on FreeStorm's CentOS in order to see if it is just very slow zooko@zooko.com**20110913024255 Ignore-this: 6a86d691e878cec583722faad06fb8e4 ] [interfaces: document that the 'fills-holes-with-zero-bytes' key should be used to detect whether a storage server has that behavior. refs #1528 david-sarah@jacaranda.org**20110913002843 Ignore-this: 1a00a6029d40f6792af48c5578c1fd69 ] [CREDITS: more CREDITS for Kevan and David-Sarah zooko@zooko.com**20110912223357 Ignore-this: 4ea8f0d6f2918171d2f5359c25ad1ada ] [merge NEWS about the mutable file bounds fixes with NEWS about work-in-progress zooko@zooko.com**20110913205521 Ignore-this: 4289a4225f848d6ae6860dd39bc92fa8 ] [doc: add NEWS item about fixes to potential palimpsest issues in mutable files zooko@zooko.com**20110912223329 Ignore-this: 9d63c95ddf95c7d5453c94a1ba4d406a ref. #1528 ] [merge the NEWS about the security fix (#1528) with the work-in-progress NEWS zooko@zooko.com**20110913205153 Ignore-this: 88e88a2ad140238c62010cf7c66953fc ] [doc: add NEWS entry about the issue which allows unauthorized deletion of shares zooko@zooko.com**20110912223246 Ignore-this: 77e06d09103d2ef6bb51ea3e5d6e80b0 ref. #1528 ] [doc: add entry in known_issues.rst about the issue which allows unauthorized deletion of shares zooko@zooko.com**20110912223135 Ignore-this: b26c6ea96b6c8740b93da1f602b5a4cd ref. #1528 ] [storage: more paranoid handling of bounds and palimpsests in mutable share files zooko@zooko.com**20110912222655 Ignore-this: a20782fa423779ee851ea086901e1507 * storage server ignores requests to extend shares by sending a new_length * storage server fills exposed holes (created by sending a write vector whose offset begins after the end of the current data) with 0 to avoid "palimpsest" exposure of previous contents * storage server zeroes out lease info at the old location when moving it to a new location ref. #1528 ] [storage: test that the storage server ignores requests to extend shares by sending a new_length, and that the storage server fills exposed holes with 0 to avoid "palimpsest" exposure of previous contents zooko@zooko.com**20110912222554 Ignore-this: 61ebd7b11250963efdf5b1734a35271 ref. #1528 ] [immutable: prevent clients from reading past the end of share data, which would allow them to learn the cancellation secret zooko@zooko.com**20110912222458 Ignore-this: da1ebd31433ea052087b75b2e3480c25 Declare explicitly that we prevent this problem in the server's version dict. fixes #1528 (there are two patches that are each a sufficient fix to #1528 and this is one of them) ] [storage: remove the storage server's "remote_cancel_lease" function zooko@zooko.com**20110912222331 Ignore-this: 1c32dee50e0981408576daffad648c50 We're removing this function because it is currently unused, because it is dangerous, and because the bug described in #1528 leaks the cancellation secret, which allows anyone who knows a file's storage index to abuse this function to delete shares of that file. fixes #1528 (there are two patches that are each a sufficient fix to #1528 and this is one of them) ] [storage: test that the storage server does *not* have a "remote_cancel_lease" function zooko@zooko.com**20110912222324 Ignore-this: 21c652009704652d35f34651f98dd403 We're removing this function because it is currently unused, because it is dangerous, and because the bug described in #1528 leaks the cancellation secret, which allows anyone who knows a file's storage index to abuse this function to delete shares of that file. ref. #1528 ] [immutable: test whether the server allows clients to read past the end of share data, which would allow them to learn the cancellation secret zooko@zooko.com**20110912221201 Ignore-this: 376e47b346c713d37096531491176349 Also test whether the server explicitly declares that it prevents this problem. ref #1528 ] [Retrieve._activate_enough_peers: rewrite Verify logic Brian Warner **20110909181150 Ignore-this: 9367c11e1eacbf025f75ce034030d717 ] [Retrieve: implement/test stopProducing Brian Warner **20110909181150 Ignore-this: 47b2c3df7dc69835e0a066ca12e3c178 ] [move DownloadStopped from download.common to interfaces Brian Warner **20110909181150 Ignore-this: 8572acd3bb16e50341dbed8eb1d90a50 ] [retrieve.py: remove vestigal self._validated_readers Brian Warner **20110909181150 Ignore-this: faab2ec14e314a53a2ffb714de626e2d ] [Retrieve: rewrite flow-control: use a top-level loop() to catch all errors Brian Warner **20110909181150 Ignore-this: e162d2cd53b3d3144fc6bc757e2c7714 This ought to close the potential for dropped errors and hanging downloads. Verify needs to be examined, I may have broken it, although all tests pass. ] [Retrieve: merge _validate_active_prefixes into _add_active_peers Brian Warner **20110909181150 Ignore-this: d3ead31e17e69394ae7058eeb5beaf4c ] [Retrieve: remove the initial prefix-is-still-good check Brian Warner **20110909181150 Ignore-this: da66ee51c894eaa4e862e2dffb458acc This check needs to be done with each fetch from the storage server, to detect when someone has changed the share (i.e. our servermap goes stale). Doing it just once at the beginning of retrieve isn't enough: a write might occur after the first segment but before the second, etc. _try_to_validate_prefix() was not removed: it will be used by the future check-with-each-fetch code. test_mutable.Roundtrip.test_corrupt_all_seqnum_late was disabled, since it fails until this check is brought back. (the corruption it applies only touches the prefix, not the block data, so the check-less retrieve actually tolerates it). Don't forget to re-enable it once the check is brought back. ] [MDMFSlotReadProxy: remove the queue Brian Warner **20110909181150 Ignore-this: 96673cb8dda7a87a423de2f4897d66d2 This is a neat trick to reduce Foolscap overhead, but the need for an explicit flush() complicates the Retrieve path and makes it prone to lost-progress bugs. Also change test_mutable.FakeStorageServer to tolerate multiple reads of the same share in a row, a limitation exposed by turning off the queue. ] [rearrange Retrieve: first step, shouldn't change order of execution Brian Warner **20110909181149 Ignore-this: e3006368bfd2802b82ea45c52409e8d6 ] [CLI: test_cli.py -- remove an unnecessary call in test_mkdir_mutable_type. refs #1527 david-sarah@jacaranda.org**20110906183730 Ignore-this: 122e2ffbee84861c32eda766a57759cf ] [CLI: improve test for 'tahoe mkdir --mutable-type='. refs #1527 david-sarah@jacaranda.org**20110906183020 Ignore-this: f1d4598e6c536f0a2b15050b3bc0ef9d ] [CLI: make the --mutable-type option value for 'tahoe put' and 'tahoe mkdir' case-insensitive, and change --help for these commands accordingly. fixes #1527 david-sarah@jacaranda.org**20110905020922 Ignore-this: 75a6df0a2df9c467d8c010579e9a024e ] [cli: make --mutable-type imply --mutable in 'tahoe put' Kevan Carstensen **20110903190920 Ignore-this: 23336d3c43b2a9554e40c2a11c675e93 ] [SFTP: add a comment about a subtle interaction between OverwriteableFileConsumer and GeneralSFTPFile, and test the case it is commenting on. david-sarah@jacaranda.org**20110903222304 Ignore-this: 980c61d4dd0119337f1463a69aeebaf0 ] [improve the storage/mutable.py asserts even more warner@lothar.com**20110901160543 Ignore-this: 5b2b13c49bc4034f96e6e3aaaa9a9946 ] [storage/mutable.py: special characters in struct.foo arguments indicate standard as opposed to native sizes, we should be using these characters in these asserts wilcoxjg@gmail.com**20110901084144 Ignore-this: 28ace2b2678642e4d7269ddab8c67f30 ] [docs/write_coordination.rst: fix formatting and add more specific warning about access via sshfs. david-sarah@jacaranda.org**20110831232148 Ignore-this: cd9c851d3eb4e0a1e088f337c291586c ] [test_mutable.Version: consolidate some tests, reduce runtime from 19s to 15s warner@lothar.com**20110831050451 Ignore-this: 64815284d9e536f8f3798b5f44cf580c ] [mutable/retrieve: handle the case where self._read_length is 0. Kevan Carstensen **20110830210141 Ignore-this: fceafbe485851ca53f2774e5a4fd8d30 Note that the downloader will still fetch a segment for a zero-length read, which is wasteful. Fixing that isn't specifically required to fix #1512, but it should probably be fixed before 1.9. ] [NEWS: added summary of all changes since 1.8.2. Needs editing. Brian Warner **20110830163205 Ignore-this: 273899b37a899fc6919b74572454b8b2 ] [test_mutable.Update: only upload the files needed for each test. refs #1500 Brian Warner **20110829072717 Ignore-this: 4d2ab4c7523af9054af7ecca9c3d9dc7 This first step shaves 15% off the runtime: from 139s to 119s on my laptop. It also fixes a couple of places where a Deferred was being dropped, which would cause two tests to run in parallel and also confuse error reporting. ] [Let Uploader retain History instead of passing it into upload(). Fixes #1079. Brian Warner **20110829063246 Ignore-this: 3902c58ec12bd4b2d876806248e19f17 This consistently records all immutable uploads in the Recent Uploads And Downloads page, regardless of code path. Previously, certain webapi upload operations (like PUT /uri/$DIRCAP/newchildname) failed to pass the History object and were left out. ] [Fix mutable publish/retrieve timing status displays. Fixes #1505. Brian Warner **20110828232221 Ignore-this: 4080ce065cf481b2180fd711c9772dd6 publish: * encrypt and encode times are cumulative, not just current-segment retrieve: * same for decrypt and decode times * update "current status" to include segment number * set status to Finished/Failed when download is complete * set progress to 1.0 when complete More improvements to consider: * progress is currently 0% or 100%: should calculate how many segments are involved (remembering retrieve can be less than the whole file) and set it to a fraction * "fetch" time is fuzzy: what we want is to know how much of the delay is not our own fault, but since we do decode/decrypt work while waiting for more shares, it's not straightforward ] [Teach 'tahoe debug catalog-shares about MDMF. Closes #1507. Brian Warner **20110828080931 Ignore-this: 56ef2951db1a648353d7daac6a04c7d1 ] [debug.py: remove some dead comments Brian Warner **20110828074556 Ignore-this: 40e74040dd4d14fd2f4e4baaae506b31 ] [hush pyflakes Brian Warner **20110828074254 Ignore-this: bef9d537a969fa82fe4decc4ba2acb09 ] [MutableFileNode.set_downloader_hints: never depend upon order of dict.values() Brian Warner **20110828074103 Ignore-this: caaf1aa518dbdde4d797b7f335230faa The old code was calculating the "extension parameters" (a list) from the downloader hints (a dictionary) with hints.values(), which is not stable, and would result in corrupted filecaps (with the 'k' and 'segsize' hints occasionally swapped). The new code always uses [k,segsize]. ] [layout.py: fix MDMF share layout documentation Brian Warner **20110828073921 Ignore-this: 3f13366fed75b5e31b51ae895450a225 ] [teach 'tahoe debug dump-share' about MDMF and offsets. refs #1507 Brian Warner **20110828073834 Ignore-this: 3a9d2ef9c47a72bf1506ba41199a1dea ] [test_mutable.Version.test_debug: use splitlines() to fix buildslaves Brian Warner **20110828064728 Ignore-this: c7f6245426fc80b9d1ae901d5218246a Any slave running in a directory with spaces in the name was miscounting shares, causing the test to fail. ] [test_mutable.Version: exercise 'tahoe debug find-shares' on MDMF. refs #1507 Brian Warner **20110828005542 Ignore-this: cb20bea1c28bfa50a72317d70e109672 Also changes NoNetworkGrid to put shares in storage/shares/ . ] [test_mutable.py: oops, missed a .todo Brian Warner **20110828002118 Ignore-this: fda09ae86481352b7a627c278d2a3940 ] [test_mutable: merge davidsarah's patch with my Version refactorings warner@lothar.com**20110827235707 Ignore-this: b5aaf481c90d99e33827273b5d118fd0 ] [Make the immutable/read-only constraint checking for MDMF URIs identical to that for SSK URIs. refs #393 david-sarah@jacaranda.org**20110823012720 Ignore-this: e1f59d7ff2007c81dbef2aeb14abd721 ] [Additional tests for MDMF URIs and for zero-length files. refs #393 david-sarah@jacaranda.org**20110823011532 Ignore-this: a7cc0c09d1d2d72413f9cd227c47a9d5 ] [Additional tests for zero-length partial reads and updates to mutable versions. refs #393 david-sarah@jacaranda.org**20110822014111 Ignore-this: 5fc6f4d06e11910124e4a277ec8a43ea ] [test_mutable.Version: factor out some expensive uploads, save 25% runtime Brian Warner **20110827232737 Ignore-this: ea37383eb85ea0894b254fe4dfb45544 ] [SDMF: update filenode with correct k/N after Retrieve. Fixes #1510. Brian Warner **20110827225031 Ignore-this: b50ae6e1045818c400079f118b4ef48 Without this, we get a regression when modifying a mutable file that was created with more shares (larger N) than our current tahoe.cfg . The modification attempt creates new versions of the (0,1,..,newN-1) shares, but leaves the old versions of the (newN,..,oldN-1) shares alone (and throws a assertion error in SDMFSlotWriteProxy.finish_publishing in the process). The mixed versions that result (some shares with e.g. N=10, some with N=20, such that both versions are recoverable) cause problems for the Publish code, even before MDMF landed. Might be related to refs #1390 and refs #1042. ] [layout.py: annotate assertion to figure out 'tahoe backup' failure Brian Warner **20110827195253 Ignore-this: 9b92b954e3ed0d0f80154fff1ff674e5 ] [Add 'tahoe debug dump-cap' support for MDMF, DIR2-CHK, DIR2-MDMF. refs #1507. Brian Warner **20110827195048 Ignore-this: 61c6af5e33fc88e0251e697a50addb2c This also adds tests for all those cases, and fixes an omission in uri.py that broke parsing of DIR2-MDMF-Verifier and DIR2-CHK-Verifier. ] [MDMF: more writable/writeable consistentifications warner@lothar.com**20110827190602 Ignore-this: 22492a9e20c1819ddb12091062888b55 ] [MDMF: s/Writable/Writeable/g, for consistency with existing SDMF code warner@lothar.com**20110827183357 Ignore-this: 9dd312acedbdb2fc2f7bef0d0fb17c0b ] [setup.cfg: remove no-longer-supported test_mac_diskimage alias. refs #1479 david-sarah@jacaranda.org**20110826230345 Ignore-this: 40e908b8937322a290fb8012bfcad02a ] [test_mutable.Update: increase timeout from 120s to 400s, slaves are failing Brian Warner **20110825230140 Ignore-this: 101b1924a30cdbda9b2e419e95ca15ec ] [tests: fix check_memory test zooko@zooko.com**20110825201116 Ignore-this: 4d66299fa8cb61d2ca04b3f45344d835 fixes #1503 ] [TAG allmydata-tahoe-1.9.0a1 warner@lothar.com**20110825161122 Ignore-this: 3cbf49f00dbda58189f893c427f65605 ] Patch bundle hash: 4859218357a7434f768d619e8e0213c9879886e8