diff --git a/docs/frontends/webapi.rst b/docs/frontends/webapi.rst index ea6ada6..0d3b876 100644 --- a/docs/frontends/webapi.rst +++ b/docs/frontends/webapi.rst @@ -36,6 +36,7 @@ The Tahoe REST-ful Web API 8. `Static Files in /public_html`_ 9. `Safety and security issues -- names vs. URIs`_ 10. `Concurrency Issues`_ +11. `Access Blacklist`_ Enabling the web-API port ========================= @@ -1905,6 +1906,48 @@ For more details, please see the "Consistency vs Availability" and "The Prime Coordination Directive" sections of `mutable.rst <../specifications/mutable.rst>`_. +Access Blacklist +================ + +Gateway nodes may find it necessary to prohibit access to certain files. The +webapi has a facility to block access to filecaps by their storage index, +returning a 403 "Forbidden" error instead of the original file. + +This blacklist is recorded in $NODEDIR/access.blacklist, and contains one +blocked file per line. The first (space-separated) field on each line is the +storage-index, in the usual base32 format as displayed by the "More Info" +page, or by the "tahoe debug dump-cap" command. The second field is a reason +(no spaces allowed), which will be included in the 403 error message. This +could hold a URL to a page that explains why the file is blocked, for +example. + +So for example, if you found a need to block access to a file with filecap +``URI:CHK:n7r3m6wmomelk4sep3kw5cvduq:os7ijw5c3maek7pg65e5254k2fzjflavtpejjyhshpsxuqzhcwwq:3:20:14861``, +you could do the following:: + + tahoe debug dump-cap URI:CHK:n7r3m6wmomelk4sep3kw5cvduq:os7ijw5c3maek7pg65e5254k2fzjflavtpejjyhshpsxuqzhcwwq:3:20:14861 + -> storage index: whpepioyrnff7orecjolvbudeu + echo "whpepioyrnff7orecjolvbudeu my-puppy-told-me-to" >>$NODEDIR/access.blacklist + tahoe restart $NODEDIR + tahoe get URI:CHK:n7r3m6wmomelk4sep3kw5cvduq:os7ijw5c3maek7pg65e5254k2fzjflavtpejjyhshpsxuqzhcwwq:3:20:14861 + -> error, 403 Access Prohibited: my-puppy-told-me-to + +If the ``access.blacklist`` file is present at node startup, each webapi +operation will check it for updates. So adding second, third, or additional +entries to the blacklist does not require a node restart. To avoid a +performance penalty for nodes that do not use the blacklist at all, this +check is not performed when the list did not exist at startup. So be sure to +restart the node after initially creating the blacklist, or it won't take +effect. + +The blacklist is applied to all access paths (including FTP, SFTP, and CLI +operations), not just the webapi. The blacklist also applies to directories. +If a directory is blacklisted, the gateway will refuse access to both that +directory and any child files/directories underneath it, when accessed via +"DIRCAP/SUBDIR/FILENAME" -style URLs. Users who go directly to the child +file/dir will bypass the blacklist. + + .. [1] URLs and HTTP and UTF-8, Oh My HTTP does not provide a mechanism to specify the character set used to diff --git a/src/allmydata/blacklist.py b/src/allmydata/blacklist.py new file mode 100755 index 0000000..539da6e --- /dev/null +++ b/src/allmydata/blacklist.py @@ -0,0 +1,32 @@ + +import os +from allmydata.util import base32 + +class FileProhibited(Exception): + """This client has been configured to prohibit access to this object.""" + def __init__(self, reason): + self.reason = reason + + +class Blacklist: + def __init__(self, blacklist_fn): + self.blacklist_fn = blacklist_fn + self.last_mtime = None + self.entries = {} + self.read_blacklist() # sets .last_mtime and .entries + + def read_blacklist(self): + current_mtime = os.stat(self.blacklist_fn).st_mtime + if self.last_mtime is None or current_mtime > self.last_mtime: + self.entries = {} + for line in open(self.blacklist_fn, "r").readlines(): + si_s, reason = line.split()[:2] + assert base32.a2b(si_s) # must be valid base32 + self.entries[si_s] = reason + self.last_mtime = current_mtime + + def check_storageindex(self, si): + self.read_blacklist() + reason = self.entries.get(base32.b2a(si), None) + if reason: + raise FileProhibited(reason) diff --git a/src/allmydata/client.py b/src/allmydata/client.py index fb7e0c8..d395284 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -24,6 +24,7 @@ from allmydata.stats import StatsProvider from allmydata.history import History from allmydata.interfaces import IStatsProducer, RIStubClient from allmydata.nodemaker import NodeMaker +from allmydata.blacklist import Blacklist KiB=1024 @@ -293,6 +294,7 @@ class Client(node.Node, pollmixin.PollMixin): self.terminator.setServiceParent(self) self.add_service(Uploader(helper_furl, self.stats_provider)) self.init_stub_client() + self.init_blacklist() self.init_nodemaker() def init_client_storage_broker(self): @@ -345,6 +347,12 @@ class Client(node.Node, pollmixin.PollMixin): d.addErrback(log.err, facility="tahoe.init", level=log.BAD, umid="OEHq3g") + def init_blacklist(self): + self.blacklist = None + fn = os.path.join(self.basedir, "access.blacklist") + if os.path.exists(fn): + self.blacklist = Blacklist(fn) + def init_nodemaker(self): self.nodemaker = NodeMaker(self.storage_broker, self._secret_holder, @@ -352,7 +360,8 @@ class Client(node.Node, pollmixin.PollMixin): self.getServiceNamed("uploader"), self.terminator, self.get_encoding_parameters(), - self._key_generator) + self._key_generator, + self.blacklist) def get_history(self): return self.history @@ -478,11 +487,15 @@ class Client(node.Node, pollmixin.PollMixin): # dirnodes. The first takes a URI and produces a filenode or (new-style) # dirnode. The other three create brand-new filenodes/dirnodes. - def create_node_from_uri(self, write_uri, read_uri=None, deep_immutable=False, name=""): + def create_node_from_uri(self, write_uri, read_uri=None, + deep_immutable=False, name=""): # This returns synchronously. - # Note that it does *not* validate the write_uri and read_uri; instead we - # may get an opaque node if there were any problems. - return self.nodemaker.create_from_cap(write_uri, read_uri, deep_immutable=deep_immutable, name=name) + # Note that it does *not* validate the write_uri and read_uri; + # instead we may get an opaque node if there were any problems. + n = self.nodemaker.create_from_cap(write_uri, read_uri, + deep_immutable=deep_immutable, + name=name) + return n def create_dirnode(self, initial_children={}): d = self.nodemaker.create_new_mutable_directory(initial_children) diff --git a/src/allmydata/nodemaker.py b/src/allmydata/nodemaker.py index 3b74d90..0aa70d5 100644 --- a/src/allmydata/nodemaker.py +++ b/src/allmydata/nodemaker.py @@ -14,7 +14,8 @@ class NodeMaker: def __init__(self, storage_broker, secret_holder, history, uploader, terminator, - default_encoding_parameters, key_generator): + default_encoding_parameters, key_generator, + blacklist=None): self.storage_broker = storage_broker self.secret_holder = secret_holder self.history = history @@ -22,6 +23,7 @@ class NodeMaker: self.terminator = terminator self.default_encoding_parameters = default_encoding_parameters self.key_generator = key_generator + self.blacklist = blacklist self._node_cache = weakref.WeakValueDictionary() # uri -> node @@ -60,14 +62,20 @@ class NodeMaker: else: memokey = "M" + bigcap if memokey in self._node_cache: - return self._node_cache[memokey] - cap = uri.from_string(bigcap, deep_immutable=deep_immutable, name=name) - node = self._create_from_single_cap(cap) - if node: - self._node_cache[memokey] = node # note: WeakValueDictionary + node = self._node_cache[memokey] else: - # don't cache UnknownNode - node = UnknownNode(writecap, readcap, deep_immutable=deep_immutable, name=name) + cap = uri.from_string(bigcap, deep_immutable=deep_immutable, + name=name) + node = self._create_from_single_cap(cap) + if node: + self._node_cache[memokey] = node # note: WeakValueDictionary + else: + # don't cache UnknownNode + node = UnknownNode(writecap, readcap, + deep_immutable=deep_immutable, name=name) + if self.blacklist: + si = node.get_storage_index() + self.blacklist.check_storageindex(si) # may raise FileProhibited return node def _create_from_single_cap(self, cap): diff --git a/src/allmydata/test/no_network.py b/src/allmydata/test/no_network.py index 42a90dd..c07a822 100644 --- a/src/allmydata/test/no_network.py +++ b/src/allmydata/test/no_network.py @@ -206,6 +206,7 @@ class NoNetworkGrid(service.MultiService): self.basedir = basedir fileutil.make_dirs(basedir) + self._client_config_hooks = client_config_hooks self.servers_by_number = {} # maps to StorageServer instance self.wrappers_by_id = {} # maps to wrapped StorageServer instance self.proxies_by_id = {} # maps to IServer on which .rref is a wrapped @@ -229,20 +230,24 @@ class NoNetworkGrid(service.MultiService): f.write("[storage]\n") f.write("enabled = false\n") f.close() - c = None - if i in client_config_hooks: - # this hook can either modify tahoe.cfg, or return an - # entirely new Client instance - c = client_config_hooks[i](clientdir) - if not c: - c = NoNetworkClient(clientdir) - c.set_default_mutable_keysize(522) - c.nodeid = clientid - c.short_nodeid = b32encode(clientid).lower()[:8] - c._servers = self.all_servers # can be updated later - c.setServiceParent(self) + c = self._create_client(i, clientdir, clientid) self.clients.append(c) + def _create_client(self, i, clientdir, clientid): + c = None + if i in self._client_config_hooks: + # this hook can either modify tahoe.cfg, or return an + # entirely new Client instance + c = self._client_config_hooks[i](clientdir) + if not c: + c = NoNetworkClient(clientdir) + c.set_default_mutable_keysize(522) + c.nodeid = clientid + c.short_nodeid = b32encode(clientid).lower()[:8] + c._servers = self.all_servers # can be updated later + c.setServiceParent(self) + return c + def make_server(self, i, readonly=False): serverid = hashutil.tagged_hash("serverid", str(i))[:20] serverdir = os.path.join(self.basedir, "servers", @@ -274,6 +279,17 @@ class NoNetworkGrid(service.MultiService): for c in self.clients: c._servers = self.all_servers + def restart_client(self, i): + # we must remove the client, then build a new one with the same id + # and basedir + old_client = self.clients[i] + d = defer.maybeDeferred(old_client.disownServiceParent) + def _then(ign): + c = self._create_client(i, old_client.basedir, old_client.nodeid) + self.clients[i] = c + d.addCallback(_then) + return d + def remove_server(self, serverid): # it's enough to remove the server from c._servers (we don't actually # have to detach and stopService it) @@ -336,6 +352,15 @@ class GridTestMixin: ss = self.g.servers_by_number[i] yield (i, ss, ss.storedir) + def restart_client(self, i=0): + d = self.g.restart_client(i) + def _then(ign): + c = self.g.clients[i] + self.client_webports[i] = c.getServiceNamed("webish").getPortnum() + self.client_baseurls[i] = c.getServiceNamed("webish").getURL() + d.addCallback(_then) + return d + def find_uri_shares(self, uri): si = tahoe_uri.from_string(uri).get_storage_index() prefixdir = storage_index_to_dir(si) diff --git a/src/allmydata/test/test_web.py b/src/allmydata/test/test_web.py index 951f554..7b23c5a 100644 --- a/src/allmydata/test/test_web.py +++ b/src/allmydata/test/test_web.py @@ -154,6 +154,7 @@ class FakeClient(Client): self.history = FakeHistory() self.uploader = FakeUploader() self.uploader.setServiceParent(self) + self.blacklist = None self.nodemaker = FakeNodeMaker(None, self._secret_holder, None, self.uploader, None, None, None) @@ -4431,6 +4432,93 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi return d + def test_blacklist(self): + # download from a blacklisted URI, get an error + self.basedir = "web/Grid/blacklist" + self.set_up_grid() + c0 = self.g.clients[0] + c0_basedir = c0.basedir + fn = os.path.join(c0_basedir, "access.blacklist") + self.uris = {} + DATA = "off-limits " * 50 + d = c0.upload(upload.Data(DATA, convergence="")) + def _stash_uri(ur): + self.uri = ur.uri + self.url = "uri/"+self.uri + u = uri.from_string_filenode(self.uri) + self.si = u.get_storage_index() + d.addCallback(_stash_uri) + d.addCallback(lambda ign: self.GET(self.url)) + def _blacklist_but_dont_restart(ign): + f = open(fn, "w") + f.write("%s %s\n" % (base32.b2a(self.si), "off-limits")) + f.close() + # now we *don't* restart the client: since no blacklist was seen + # at startup, the change is ignored. This confirms that the node + # behaves as the docs specify, even if that behavior might not be + # what you always want. (the intention is to avoid a performance + # penalty for the majority of nodes that don't use a blacklist). + d.addCallback(_blacklist_but_dont_restart) + d.addCallback(lambda ign: self.GET(self.url)) + d.addCallback(lambda ign: self.restart_client(0)) # c0 now invalid + # now the blacklist should be active + d.addCallback(lambda ign: + self.shouldHTTPError("_get_from_blacklisted_uri", + 403, "Forbidden", + "Access Prohibited: off-limits", + self.GET, "uri/" + self.uri)) + def _unblacklist(ign): + open(fn, "w").close() + # the Blacklist object watches mtime to tell when the file has + # changed, but on windows this test will run faster than the + # filesystem's mtime resolution. So we edit Blacklist.last_mtime + # to force a reload. + self.g.clients[0].blacklist.last_mtime -= 2.0 + d.addCallback(_unblacklist) + # now a read should work + d.addCallback(lambda ign: self.GET(self.url)) + # read again to exercise the blacklist-is-unchanged logic + d.addCallback(lambda ign: self.GET(self.url)) + + # now add a blacklisted directory, and make sure files under it are + # refused too + def _add_dir(ign): + childnode = c0.create_node_from_uri(self.uri, None) + return c0.create_dirnode({u"child": (childnode,{}) }) + d.addCallback(_add_dir) + def _get_dircap(dn): + self.dir_si_b32 = base32.b2a(dn.get_storage_index()) + self.dir_url_rw = "uri/"+dn.get_write_uri()+"/?t=json" + self.dir_url_ro = "uri/"+dn.get_readonly_uri()+"/?t=json" + self.child_url = "uri/"+dn.get_readonly_uri()+"/child" + d.addCallback(_get_dircap) + d.addCallback(lambda ign: self.GET(self.dir_url_rw)) + d.addCallback(lambda ign: self.GET(self.dir_url_ro)) + d.addCallback(lambda ign: self.GET(self.child_url)) + def _block_dir(ign): + f = open(fn, "w") + f.write("%s %s\n" % (base32.b2a(self.si), "dir-off-limits")) + f.close() + self.g.clients[0].blacklist.last_mtime -= 2.0 + d.addCallback(_block_dir) + d.addCallback(lambda ign: + self.shouldHTTPError("_get_from_blacklisted_uri 2", + 403, "Forbidden", + "Access Prohibited: dir-off-limits", + self.GET, self.dir_url_rw)) + d.addCallback(lambda ign: + self.shouldHTTPError("_get_from_blacklisted_uri 3", + 403, "Forbidden", + "Access Prohibited: dir-off-limits", + self.GET, self.dir_url_ro)) + d.addCallback(lambda ign: + self.shouldHTTPError("_get_from_blacklisted_uri 4", + 403, "Forbidden", + "Access Prohibited: dir-off-limits", + self.GET, self.child_url)) + + return d + class CompletelyUnhandledError(Exception): pass class ErrorBoom(rend.Page): diff --git a/src/allmydata/web/common.py b/src/allmydata/web/common.py index 22083a5..07ea487 100644 --- a/src/allmydata/web/common.py +++ b/src/allmydata/web/common.py @@ -6,6 +6,7 @@ from zope.interface import Interface from nevow import loaders, appserver from nevow.inevow import IRequest from nevow.util import resource_filename +from allmydata import blacklist from allmydata.interfaces import ExistingChildError, NoSuchChildError, \ FileTooLargeError, NotEnoughSharesError, NoSharesError, \ EmptyPathnameComponentError, MustBeDeepImmutableError, \ @@ -231,6 +232,9 @@ def humanize_failure(f): "The cap is being passed in a read slot (ro_uri), or was retrieved " "from a read slot as an unknown cap.") % quoted_name return (t, http.BAD_REQUEST) + if f.check(blacklist.FileProhibited): + t = "Access Prohibited: %s" % f.value.reason + return (t, http.FORBIDDEN) if f.check(WebError): return (f.value.text, f.value.code) if f.check(FileTooLargeError): diff --git a/src/allmydata/webish.py b/src/allmydata/webish.py index 03ca3ba..a8e0bff 100644 --- a/src/allmydata/webish.py +++ b/src/allmydata/webish.py @@ -129,7 +129,7 @@ class WebishServer(service.MultiService): name = "webish" def __init__(self, client, webport, nodeurl_path=None, staticdir=None, - clock=None): + clock=None): service.MultiService.__init__(self) # the 'data' argument to all render() methods default to the Client # the 'clock' argument to root.Root is, if set, a