diff -rN -u old-#928-late-buckets/src/allmydata/immutable/download.py new-#928-late-buckets/src/allmydata/immutable/download.py --- old-#928-late-buckets/src/allmydata/immutable/download.py 2010-01-31 18:50:17.000000000 -0700 +++ new-#928-late-buckets/src/allmydata/immutable/download.py 2010-01-31 18:50:19.000000000 -0700 @@ -811,6 +811,10 @@ # self._responses_received = 0 # self._queries_failed = 0 + # This is solely for the use of unit tests. It will be triggered when + # we start downloading shares. + self._stage_4_d = defer.Deferred() + def pauseProducing(self): if self._paused: return @@ -1119,6 +1123,8 @@ # this pause, at the end of write, prevents pre-fetch from # happening until the consumer is ready for more data. d.addCallback(self._check_for_pause) + + self._stage_4_d.callback(None) return d def _check_for_pause(self, res): diff -rN -u old-#928-late-buckets/src/allmydata/test/no_network.py new-#928-late-buckets/src/allmydata/test/no_network.py --- old-#928-late-buckets/src/allmydata/test/no_network.py 2010-01-31 18:50:17.000000000 -0700 +++ new-#928-late-buckets/src/allmydata/test/no_network.py 2010-01-31 18:50:19.000000000 -0700 @@ -252,12 +252,22 @@ def break_server(self, serverid): # mark the given server as broken, so it will throw exceptions when - # asked to hold a share + # asked to hold a share or serve a share self.servers_by_id[serverid].broken = True - def hang_server(self, serverid, until=defer.Deferred()): - # hang the given server until 'until' fires - self.servers_by_id[serverid].hung_until = until + def break_server_xyz(self, serverid): + # mark the given server as broken, so it will throw exceptions when + # asked to hold a share or serve a share + self.servers_by_id[serverid].broken = True + + def hang_server(self, serverid): + # hang the given server + assert self.servers_by_id[serverid].hung_until is None + self.servers_by_id[serverid].hung_until = defer.Deferred() + + def unhang_server(self, serverid): + # unhang the given server + self.servers_by_id[serverid].hung_until.callback(None) class GridTestMixin: diff -rN -u old-#928-late-buckets/src/allmydata/test/test_hung_server.py new-#928-late-buckets/src/allmydata/test/test_hung_server.py --- old-#928-late-buckets/src/allmydata/test/test_hung_server.py 2010-01-31 18:50:17.000000000 -0700 +++ new-#928-late-buckets/src/allmydata/test/test_hung_server.py 2010-01-31 18:50:19.000000000 -0700 @@ -9,6 +9,7 @@ from allmydata.storage.common import storage_index_to_dir from allmydata.test.no_network import GridTestMixin from allmydata.test.common import ShouldFailMixin +from allmydata.test.common_util import StallMixin from allmydata.interfaces import NotEnoughSharesError immutable_plaintext = "data" * 10000 @@ -22,9 +23,15 @@ self.g.break_server(id) def _hang(self, servers, **kwargs): + print "YYY 0 %s" % (servers,) for (id, ss) in servers: self.g.hang_server(id, **kwargs) + def _unhang(self, servers, **kwargs): + print "YYY 1 %s" % (servers,) + for (id, ss) in servers: + self.g.unhang_server(id, **kwargs) + def _delete_all_shares_from(self, servers): serverids = [id for (id, ss) in servers] for (i_shnum, i_serverid, i_sharefile) in self.shares: @@ -80,35 +87,49 @@ d.addCallback(_uploaded_immutable) return d - def _check_download(self): + def _start_download(self): n = self.c0.create_node_from_uri(self.uri) if self.mutable: d = n.download_best_version() - expected_plaintext = mutable_plaintext else: d = download_to_data(n) - expected_plaintext = immutable_plaintext - def _got_data(data): - self.failUnlessEqual(data, expected_plaintext) - d.addCallback(_got_data) + ciphertextdownloader = n._downloader._all_downloads.keys()[0] + return (d, ciphertextdownloader._stage_4_d,) + + def _wait_for_data(self, n): + if self.mutable: + d = n.download_best_version() + else: + d = download_to_data(n) + return d + + def _check(self, resultingdata): + if self.mutable: + self.failUnlessEqual(resultingdata, mutable_plaintext) + else: + self.failUnlessEqual(resultingdata, immutable_plaintext) + + def _download_and_check(self): + d, stage4d = self._start_download() + d.addCallback(self._check) return d def _should_fail_download(self): if self.mutable: return self.shouldFail(UnrecoverableFileError, self.basedir, "no recoverable versions", - self._check_download) + self._download_and_check) else: return self.shouldFail(NotEnoughSharesError, self.basedir, "Failed to get enough shareholders", - self._check_download) + self._download_and_check) def test_10_good_sanity_check(self): d = defer.succeed(None) for mutable in [False, True]: d.addCallback(lambda ign: self._set_up(mutable, "test_10_good_sanity_check")) - d.addCallback(lambda ign: self._check_download()) + d.addCallback(lambda ign: self._download_and_check()) return d def test_10_good_copied_share(self): @@ -116,7 +137,7 @@ for mutable in [False, True]: d.addCallback(lambda ign: self._set_up(mutable, "test_10_good_copied_share")) d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[2:3], self.servers[0])) - d.addCallback(lambda ign: self._check_download()) + d.addCallback(lambda ign: self._download_and_check()) return d def test_3_good_7_noshares(self): @@ -124,7 +145,7 @@ for mutable in [False, True]: d.addCallback(lambda ign: self._set_up(mutable, "test_3_good_7_noshares")) d.addCallback(lambda ign: self._delete_all_shares_from(self.servers[3:])) - d.addCallback(lambda ign: self._check_download()) + d.addCallback(lambda ign: self._download_and_check()) return d def test_2_good_8_broken_fail(self): @@ -149,7 +170,7 @@ d.addCallback(lambda ign: self._set_up(mutable, "test_2_good_8_broken_copied_share")) d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[2:3], self.servers[0])) d.addCallback(lambda ign: self._break(self.servers[2:])) - d.addCallback(lambda ign: self._check_download()) + d.addCallback(lambda ign: self._download_and_check()) return d def test_2_good_8_broken_duplicate_share_fail(self): @@ -168,7 +189,7 @@ for mutable in [False]: d.addCallback(lambda ign: self._set_up(mutable, "test_3_good_7_hung")) d.addCallback(lambda ign: self._hang(self.servers[3:])) - d.addCallback(lambda ign: self._check_download()) + d.addCallback(lambda ign: self._download_and_check()) return d def test_2_good_8_hung_then_1_recovers(self): @@ -176,10 +197,10 @@ for mutable in [False]: recovered = defer.Deferred() d.addCallback(lambda ign: self._set_up(mutable, "test_2_good_8_hung_then_1_recovers")) - d.addCallback(lambda ign: self._hang(self.servers[2:3], until=recovered)) + d.addCallback(lambda ign: self._hang(self.servers[2:3])) d.addCallback(lambda ign: self._hang(self.servers[3:])) d.addCallback(lambda ign: reactor.callLater(5, recovered.callback, None)) - d.addCallback(lambda ign: self._check_download()) + d.addCallback(lambda ign: self._download_and_check()) return d def test_2_good_8_hung_then_1_recovers_with_2_shares(self): @@ -188,8 +209,26 @@ recovered = defer.Deferred() d.addCallback(lambda ign: self._set_up(mutable, "test_2_good_8_hung_then_1_recovers_with_2_shares")) d.addCallback(lambda ign: self._copy_all_shares_from(self.servers[0:1], self.servers[2])) - d.addCallback(lambda ign: self._hang(self.servers[2:3], until=recovered)) + d.addCallback(lambda ign: self._hang(self.servers[2:3])) d.addCallback(lambda ign: self._hang(self.servers[3:])) - d.addCallback(lambda ign: reactor.callLater(5, recovered.callback, None)) - d.addCallback(lambda ign: self._check_download()) + d.addCallback(lambda ign: self._unhang(self.servers[2:3])) + d.addCallback(lambda ign: self._download_and_check()) + return d + + def test_failover_during_stage_4(self): + # See #287 + d = defer.succeed(None) + for mutable in [False]: + recovered = defer.Deferred() + d.addCallback(lambda ign: self._set_up(mutable, "test_failover_during_stage_4")) + d.addCallback(lambda ign: self._hang(self.servers[3:])) + d.addCallback(lambda ign: self._start_download()) + def _after_stage4_starts(ign): + # Now unhang server3 + self._unhang(self.servers[3]) + # and corrupt a share in server 2 + self._corrupt_share_in(self.servers[2]) + d.addCallback(lambda doned, started4d: started4d.addCallback(_after_stage4_starts)) + d.addCallback(lambda ign: self._check) + return d