diff -rN -u old-tahoe/src/allmydata/immutable/download.py new-tahoe/src/allmydata/immutable/download.py --- old-tahoe/src/allmydata/immutable/download.py 2010-02-01 03:34:57.260000000 +0000 +++ new-tahoe/src/allmydata/immutable/download.py 2010-02-01 03:35:01.431000000 +0000 @@ -809,6 +809,10 @@ # self._responses_received = 0 # self._queries_failed = 0 + # This is solely for the use of unit tests. It will be triggered when + # we start downloading shares. + self._stage_4_d = defer.Deferred() + def pauseProducing(self): if self._paused: return @@ -1109,6 +1113,8 @@ # this pause, at the end of write, prevents pre-fetch from # happening until the consumer is ready for more data. d.addCallback(self._check_for_pause) + + self._stage_4_d.callback(None) return d def _check_for_pause(self, res): diff -rN -u old-tahoe/src/allmydata/test/no_network.py new-tahoe/src/allmydata/test/no_network.py --- old-tahoe/src/allmydata/test/no_network.py 2010-02-01 03:34:57.794000000 +0000 +++ new-tahoe/src/allmydata/test/no_network.py 2010-02-01 03:35:01.974000000 +0000 @@ -252,12 +252,26 @@ def break_server(self, serverid): # mark the given server as broken, so it will throw exceptions when - # asked to hold a share + # asked to hold a share or serve a share + self.servers_by_id[serverid].broken = True + + def break_server_xyz(self, serverid): + # mark the given server as broken, so it will throw exceptions when + # asked to hold a share or serve a share self.servers_by_id[serverid].broken = True def hang_server(self, serverid, until=defer.Deferred()): - # hang the given server until 'until' fires - self.servers_by_id[serverid].hung_until = until + # hang the given server + ss = self.servers_by_id[serverid] + assert ss.hung_until is None + ss.hung_until = until + + def unhang_server(self, serverid): + # unhang the given server + ss = self.servers_by_id[serverid] + assert ss.hung_until is not None + ss.hung_until.callback(None) + ss.hung_until = None class GridTestMixin: diff -rN -u old-tahoe/src/allmydata/test/test_hung_server.py new-tahoe/src/allmydata/test/test_hung_server.py --- old-tahoe/src/allmydata/test/test_hung_server.py 2010-02-01 03:34:58.030000000 +0000 +++ new-tahoe/src/allmydata/test/test_hung_server.py 2010-02-01 03:35:02.141000000 +0000 @@ -8,7 +8,7 @@ from allmydata.mutable.common import UnrecoverableFileError from allmydata.storage.common import storage_index_to_dir from allmydata.test.no_network import GridTestMixin -from allmydata.test.common import ShouldFailMixin +from allmydata.test.common import ShouldFailMixin, _corrupt_share_data from allmydata.interfaces import NotEnoughSharesError immutable_plaintext = "data" * 10000 @@ -22,15 +22,27 @@ self.g.break_server(id) def _hang(self, servers, **kwargs): + print "YYY 0 %s" % (servers,) for (id, ss) in servers: self.g.hang_server(id, **kwargs) + def _unhang(self, servers, **kwargs): + print "YYY 1 %s" % (servers,) + for (id, ss) in servers: + self.g.unhang_server(id, **kwargs) + def _delete_all_shares_from(self, servers): serverids = [id for (id, ss) in servers] for (i_shnum, i_serverid, i_sharefile) in self.shares: if i_serverid in serverids: os.unlink(i_sharefile) + def _corrupt_all_shares_in(self, servers, corruptor_func): + serverids = [id for (id, ss) in servers] + for (i_shnum, i_serverid, i_sharefile) in self.shares: + if i_serverid in serverids: + self._corrupt_share((i_shnum, i_sharefile), corruptor_func) + def _copy_all_shares_from(self, from_servers, to_server): serverids = [id for (id, ss) in from_servers] for (i_shnum, i_serverid, i_sharefile) in self.shares: @@ -38,19 +50,28 @@ self._copy_share((i_shnum, i_sharefile), to_server) def _copy_share(self, share, to_server): - (sharenum, sharefile) = share - (id, ss) = to_server - shares_dir = os.path.join(ss.original.storedir, "shares") - si = uri.from_string(self.uri).get_storage_index() - si_dir = os.path.join(shares_dir, storage_index_to_dir(si)) - if not os.path.exists(si_dir): - os.makedirs(si_dir) - new_sharefile = os.path.join(si_dir, str(sharenum)) - shutil.copy(sharefile, new_sharefile) - self.shares = self.find_shares(self.uri) - # Make sure that the storage server has the share. - self.failUnless((sharenum, ss.original.my_nodeid, new_sharefile) - in self.shares) + (sharenum, sharefile) = share + (id, ss) = to_server + shares_dir = os.path.join(ss.original.storedir, "shares") + si = uri.from_string(self.uri).get_storage_index() + si_dir = os.path.join(shares_dir, storage_index_to_dir(si)) + if not os.path.exists(si_dir): + os.makedirs(si_dir) + new_sharefile = os.path.join(si_dir, str(sharenum)) + shutil.copy(sharefile, new_sharefile) + self.shares = self.find_shares(self.uri) + # Make sure that the storage server has the share. + self.failUnless((sharenum, ss.original.my_nodeid, new_sharefile) + in self.shares) + + def _corrupt_share(self, share, corruptor_func): + (sharenum, sharefile) = share + data = open(sharefile, "rb").read() + newdata = corruptor_func(data) + os.unlink(sharefile) + wf = open(sharefile, "wb") + wf.write(newdata) + wf.close() def _set_up(self, mutable, testdir, num_clients=1, num_servers=10): self.mutable = mutable @@ -80,35 +101,51 @@ d.addCallback(_uploaded_immutable) return d - def _check_download(self): + def _start_download(self): n = self.c0.create_node_from_uri(self.uri) if self.mutable: d = n.download_best_version() - expected_plaintext = mutable_plaintext + self._stage_4_d = None + else: + d = download_to_data(n) + ciphertextdownloader = n._downloader._all_downloads.keys()[0] # too ugly! FIXME + self._stage_4_d = ciphertextdownloader._stage_4_d + return d + + def _wait_for_data(self, n): + if self.mutable: + d = n.download_best_version() else: d = download_to_data(n) - expected_plaintext = immutable_plaintext - def _got_data(data): - self.failUnlessEqual(data, expected_plaintext) - d.addCallback(_got_data) + return d + + def _check(self, resultingdata): + if self.mutable: + self.failUnlessEqual(resultingdata, mutable_plaintext) + else: + self.failUnlessEqual(resultingdata, immutable_plaintext) + + def _download_and_check(self): + d = self._start_download() + d.addCallback(self._check) return d def _should_fail_download(self): if self.mutable: return self.shouldFail(UnrecoverableFileError, self.basedir, "no recoverable versions", - self._check_download) + self._download_and_check) else: return self.shouldFail(NotEnoughSharesError, self.basedir, "Failed to get enough shareholders", - self._check_download) + self._download_and_check) def test_10_good_sanity_check(self): d = defer.succeed(None) for mutable in [False, True]: d.addCallback(lambda ign: self._set_up(mutable, "test_10_good_sanity_check")) - d.addCallback(lambda ign: self._check_download()) + d.addCallback(lambda ign: self._download_and_check()) return d def test_10_good_copied_share(self): @@ -116,7 +153,7 @@ for mutable in [False, True]: d.addCallback(lambda ign: self._set_up(mutable, "test_10_good_copied_share")) d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[2:3], self.servers[0])) - d.addCallback(lambda ign: self._check_download()) + d.addCallback(lambda ign: self._download_and_check()) return d def test_3_good_7_noshares(self): @@ -124,7 +161,7 @@ for mutable in [False, True]: d.addCallback(lambda ign: self._set_up(mutable, "test_3_good_7_noshares")) d.addCallback(lambda ign: self._delete_all_shares_from(self.servers[3:])) - d.addCallback(lambda ign: self._check_download()) + d.addCallback(lambda ign: self._download_and_check()) return d def test_2_good_8_broken_fail(self): @@ -149,7 +186,7 @@ d.addCallback(lambda ign: self._set_up(mutable, "test_2_good_8_broken_copied_share")) d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[2:3], self.servers[0])) d.addCallback(lambda ign: self._break(self.servers[2:])) - d.addCallback(lambda ign: self._check_download()) + d.addCallback(lambda ign: self._download_and_check()) return d def test_2_good_8_broken_duplicate_share_fail(self): @@ -168,7 +205,7 @@ for mutable in [False]: d.addCallback(lambda ign: self._set_up(mutable, "test_3_good_7_hung")) d.addCallback(lambda ign: self._hang(self.servers[3:])) - d.addCallback(lambda ign: self._check_download()) + d.addCallback(lambda ign: self._download_and_check()) return d def test_2_good_8_hung_then_1_recovers(self): @@ -179,17 +216,44 @@ d.addCallback(lambda ign: self._hang(self.servers[2:3], until=recovered)) d.addCallback(lambda ign: self._hang(self.servers[3:])) d.addCallback(lambda ign: reactor.callLater(5, recovered.callback, None)) - d.addCallback(lambda ign: self._check_download()) + d.addCallback(lambda ign: self._download_and_check()) return d def test_2_good_8_hung_then_1_recovers_with_2_shares(self): d = defer.succeed(None) for mutable in [False]: - recovered = defer.Deferred() + #recovered = defer.Deferred() d.addCallback(lambda ign: self._set_up(mutable, "test_2_good_8_hung_then_1_recovers_with_2_shares")) d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[0:1], self.servers[2])) - d.addCallback(lambda ign: self._hang(self.servers[2:3], until=recovered)) + #d.addCallback(lambda ign: self._hang(self.servers[2:3])) d.addCallback(lambda ign: self._hang(self.servers[3:])) - d.addCallback(lambda ign: reactor.callLater(5, recovered.callback, None)) - d.addCallback(lambda ign: self._check_download()) + #d.addCallback(lambda ign: self._unhang(self.servers[2:3])) + d.addCallback(lambda ign: self._download_and_check()) return d + + def test_failover_during_stage_4(self): + # See #287 + d = defer.succeed(None) + for mutable in [False]: + #recovered = defer.Deferred() + d.addCallback(lambda ign: self._set_up(mutable, "test_failover_during_stage_4")) + d.addCallback(lambda ign: self._corrupt_all_shares_in(self.servers[2:3], _corrupt_share_data)) + d.addCallback(lambda ign: self._hang(self.servers[3:])) + d.addCallback(lambda ign: self._start_download()) + def _after_starting_download(ign): + d2 = defer.succeed(None) + started4d = self._stage_4_d + print "xxx 0 %s" % (started4d,) + def p(x): + print "xxx1 %s" % (x,) + return x + started4d.addCallback(p) + started4d.addCallback(lambda ign: self._unhang(self.servers[3:4])) + def p2(x): + print "xxx2 %s" % (x,) + return x + d2.addCallback(p2) + d2.addCallback(self._check) + return d2 + d.addCallback(_after_starting_download) + return d