Sun May 16 23:43:37 CEST 2010 Francois Deppierraz * Fix handling of correctly encoded unicode filenames (#534) Tahoe CLI commands working on local files, for instance 'tahoe cp' or 'tahoe backup', have been improved to correctly handle filenames containing non-ASCII characters. In the case where Tahoe encounters a filename which cannot be decoded using the system encoding, an error will be returned and the operation will fail. Under Linux, this typically happens when the filesystem contains filenames encoded with another encoding, for instance latin1, than the system locale, for instance UTF-8. In such case, you'll need to fix your system with tools such as 'convmv' before using Tahoe CLI. All CLI commands have been improved to support non-ASCII parameters such as filenames and aliases on all supported Operating Systems except Windows as of now. ***END OF DESCRIPTION*** Place the long patch description above the ***END OF DESCRIPTION*** marker. The first line of this file will be the patch name. This patch contains the following changes: diff -rN -u old-tahoe-534/docs/frontends/CLI.txt new-tahoe-534/docs/frontends/CLI.txt --- old-tahoe-534/docs/frontends/CLI.txt 2010-05-17 09:57:49.323906328 +0200 +++ new-tahoe-534/docs/frontends/CLI.txt 2010-05-17 09:57:49.396409282 +0200 @@ -123,13 +123,13 @@ perspective on the graph of files and directories. Each tahoe node remembers a list of starting points, named "aliases", -in a file named ~/.tahoe/private/aliases . These aliases are short -strings that stand in for a directory read- or write- cap. If you use -the command line "ls" without any "[STARTING_DIR]:" argument, then it -will use the default alias, which is "tahoe", therefore "tahoe ls" has -the same effect as "tahoe ls tahoe:". The same goes for the other -commands which can reasonably use a default alias: get, put, mkdir, -mv, and rm. +in a file named ~/.tahoe/private/aliases . These aliases are short UTF-8 +encoded strings that stand in for a directory read- or write- cap. If +you use the command line "ls" without any "[STARTING_DIR]:" argument, +then it will use the default alias, which is "tahoe", therefore "tahoe +ls" has the same effect as "tahoe ls tahoe:". The same goes for the +other commands which can reasonably use a default alias: get, put, +mkdir, mv, and rm. For backwards compatibility with Tahoe-1.0, if the "tahoe": alias is not found in ~/.tahoe/private/aliases, the CLI will use the contents of diff -rN -u old-tahoe-534/NEWS new-tahoe-534/NEWS --- old-tahoe-534/NEWS 2010-05-17 09:57:49.323906328 +0200 +++ new-tahoe-534/NEWS 2010-05-17 09:57:49.336410287 +0200 @@ -1,5 +1,26 @@ User visible changes in Tahoe-LAFS. -*- outline -*- +* Release 1.7.0 + +** Bugfixes + +*** Unicode filenames handling + +Tahoe CLI commands working on local files, for instance 'tahoe cp' or 'tahoe +backup', have been improved to correctly handle filenames containing non-ASCII +characters. + +In the case where Tahoe encounters a filename which cannot be decoded using the +system encoding, an error will be returned and the operation will fail. Under +Linux, this typically happens when the filesystem contains filenames encoded +with another encoding, for instance latin1, than the system locale, for +instance UTF-8. In such case, you'll need to fix your system with tools such +as 'convmv' before using Tahoe CLI. + +All CLI commands have been improved to support non-ASCII parameters such as +filenames and aliases on all supported Operating Systems except Windows as of +now. + * Release 1.6.1 (2010-02-27) ** Bugfixes diff -rN -u old-tahoe-534/src/allmydata/scripts/cli.py new-tahoe-534/src/allmydata/scripts/cli.py --- old-tahoe-534/src/allmydata/scripts/cli.py 2010-05-17 09:57:49.323906328 +0200 +++ new-tahoe-534/src/allmydata/scripts/cli.py 2010-05-17 09:57:49.636405126 +0200 @@ -1,6 +1,7 @@ import os.path, re, sys, fnmatch from twisted.python import usage from allmydata.scripts.common import BaseOptions, get_aliases +from allmydata.util.stringutils import argv_to_unicode NODEURL_RE=re.compile("http://([^:]*)(:([1-9][0-9]*))?") @@ -49,12 +50,12 @@ class MakeDirectoryOptions(VDriveOptions): def parseArgs(self, where=""): - self.where = where + self.where = argv_to_unicode(where) longdesc = """Create a new directory, either unlinked or as a subdirectory.""" class AddAliasOptions(VDriveOptions): def parseArgs(self, alias, cap): - self.alias = alias + self.alias = argv_to_unicode(alias) self.cap = cap def getSynopsis(self): @@ -64,7 +65,7 @@ class CreateAliasOptions(VDriveOptions): def parseArgs(self, alias): - self.alias = alias + self.alias = argv_to_unicode(alias) def getSynopsis(self): return "%s create-alias ALIAS" % (os.path.basename(sys.argv[0]),) @@ -83,7 +84,7 @@ ("json", None, "Show the raw JSON output"), ] def parseArgs(self, where=""): - self.where = where + self.where = argv_to_unicode(where) longdesc = """ List the contents of some portion of the grid. @@ -118,8 +119,13 @@ # tahoe get FOO bar # write to local file # tahoe get tahoe:FOO bar # same - self.from_file = arg1 - self.to_file = arg2 + self.from_file = argv_to_unicode(arg1) + + if arg2: + self.to_file = argv_to_unicode(arg2) + else: + self.to_file = None + if self.to_file == "-": self.to_file = None @@ -151,15 +157,15 @@ # see Examples below if arg1 is not None and arg2 is not None: - self.from_file = arg1 - self.to_file = arg2 + self.from_file = argv_to_unicode(arg1) + self.to_file = argv_to_unicode(arg2) elif arg1 is not None and arg2 is None: - self.from_file = arg1 # might be "-" + self.from_file = argv_to_unicode(arg1) # might be "-" self.to_file = None else: self.from_file = None self.to_file = None - if self.from_file == "-": + if self.from_file == u"-": self.from_file = None def getSynopsis(self): @@ -197,8 +203,8 @@ def parseArgs(self, *args): if len(args) < 2: raise usage.UsageError("cp requires at least two arguments") - self.sources = args[:-1] - self.destination = args[-1] + self.sources = map(argv_to_unicode, args[:-1]) + self.destination = argv_to_unicode(args[-1]) def getSynopsis(self): return "Usage: tahoe [options] cp FROM.. TO" longdesc = """ @@ -228,15 +234,15 @@ class RmOptions(VDriveOptions): def parseArgs(self, where): - self.where = where + self.where = argv_to_unicode(where) def getSynopsis(self): return "%s rm REMOTE_FILE" % (os.path.basename(sys.argv[0]),) class MvOptions(VDriveOptions): def parseArgs(self, frompath, topath): - self.from_file = frompath - self.to_file = topath + self.from_file = argv_to_unicode(frompath) + self.to_file = argv_to_unicode(topath) def getSynopsis(self): return "%s mv FROM TO" % (os.path.basename(sys.argv[0]),) @@ -254,8 +260,8 @@ class LnOptions(VDriveOptions): def parseArgs(self, frompath, topath): - self.from_file = frompath - self.to_file = topath + self.from_file = argv_to_unicode(frompath) + self.to_file = argv_to_unicode(topath) def getSynopsis(self): return "%s ln FROM TO" % (os.path.basename(sys.argv[0]),) @@ -279,8 +285,8 @@ self['exclude'] = set() def parseArgs(self, localdir, topath): - self.from_dir = localdir - self.to_dir = topath + self.from_dir = argv_to_unicode(localdir) + self.to_dir = argv_to_unicode(topath) def getSynopsis(Self): return "%s backup FROM ALIAS:TO" % os.path.basename(sys.argv[0]) @@ -334,7 +340,7 @@ class WebopenOptions(VDriveOptions): def parseArgs(self, where=''): - self.where = where + self.where = argv_to_unicode(where) def getSynopsis(self): return "%s webopen [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),) @@ -350,7 +356,7 @@ ("raw", "r", "Display raw JSON data instead of parsed"), ] def parseArgs(self, where=''): - self.where = where + self.where = argv_to_unicode(where) def getSynopsis(self): return "%s manifest [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),) @@ -363,7 +369,7 @@ ("raw", "r", "Display raw JSON data instead of parsed"), ] def parseArgs(self, where=''): - self.where = where + self.where = argv_to_unicode(where) def getSynopsis(self): return "%s stats [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),) @@ -379,7 +385,7 @@ ("add-lease", None, "Add/renew lease on all shares"), ] def parseArgs(self, where=''): - self.where = where + self.where = argv_to_unicode(where) def getSynopsis(self): return "%s check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),) @@ -398,7 +404,7 @@ ("verbose", "v", "Be noisy about what is happening."), ] def parseArgs(self, where=''): - self.where = where + self.where = argv_to_unicode(where) def getSynopsis(self): return "%s deep-check [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),) diff -rN -u old-tahoe-534/src/allmydata/scripts/common.py new-tahoe-534/src/allmydata/scripts/common.py --- old-tahoe-534/src/allmydata/scripts/common.py 2010-05-17 09:57:49.313905408 +0200 +++ new-tahoe-534/src/allmydata/scripts/common.py 2010-05-17 09:57:49.636405126 +0200 @@ -1,7 +1,9 @@ import os, sys, urllib +import codecs from twisted.python import usage - +from allmydata.util.stringutils import unicode_to_url +from allmydata.util.assertutil import precondition class BaseOptions: # unit tests can override these to point at StringIO instances @@ -100,14 +102,14 @@ except EnvironmentError: pass try: - f = open(aliasfile, "r") + f = codecs.open(aliasfile, "r", "utf-8") for line in f.readlines(): line = line.strip() if line.startswith("#") or not line: continue name, cap = line.split(":", 1) # normalize it: remove http: prefix, urldecode - cap = cap.strip() + cap = cap.strip().encode('ascii') aliases[name] = uri.from_string_dirnode(cap).to_string() except EnvironmentError: pass @@ -138,7 +140,7 @@ # and default is not found in aliases, an UnknownAliasError is # raised. path = path.strip() - if uri.has_uri_prefix(path): + if uri.has_uri_prefix(path.encode('ascii', 'ignore')): # We used to require "URI:blah:./foo" in order to get a subpath, # stripping out the ":./" sequence. We still allow that for compatibility, # but now also allow just "URI:blah/foo". @@ -180,4 +182,4 @@ def escape_path(path): segments = path.split("/") - return "/".join([urllib.quote(s) for s in segments]) + return "/".join([urllib.quote(unicode_to_url(s)) for s in segments]) diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_add_alias.py new-tahoe-534/src/allmydata/scripts/tahoe_add_alias.py --- old-tahoe-534/src/allmydata/scripts/tahoe_add_alias.py 2010-05-17 09:57:49.313905408 +0200 +++ new-tahoe-534/src/allmydata/scripts/tahoe_add_alias.py 2010-05-17 09:57:49.646405047 +0200 @@ -1,16 +1,20 @@ import os.path +import codecs +import sys from allmydata import uri from allmydata.scripts.common_http import do_http, check_http_error from allmydata.scripts.common import get_aliases from allmydata.util.fileutil import move_into_place +from allmydata.util.stringutils import unicode_to_stdout + def add_line_to_aliasfile(aliasfile, alias, cap): # we use os.path.exists, rather than catching EnvironmentError, to avoid # clobbering the valuable alias file in case of spurious or transient # filesystem errors. if os.path.exists(aliasfile): - f = open(aliasfile, "r") + f = codecs.open(aliasfile, "r", "utf-8") aliases = f.read() f.close() if not aliases.endswith("\n"): @@ -18,7 +22,7 @@ else: aliases = "" aliases += "%s: %s\n" % (alias, cap) - f = open(aliasfile+".tmp", "w") + f = codecs.open(aliasfile+".tmp", "w", "utf-8") f.write(aliases) f.close() move_into_place(aliasfile+".tmp", aliasfile) @@ -41,7 +45,7 @@ add_line_to_aliasfile(aliasfile, alias, cap) - print >>stdout, "Alias '%s' added" % (alias,) + print >>stdout, "Alias '%s' added" % (unicode_to_stdout(alias),) return 0 def create_alias(options): @@ -74,7 +78,7 @@ add_line_to_aliasfile(aliasfile, alias, new_uri) - print >>stdout, "Alias '%s' created" % (alias,) + print >>stdout, "Alias '%s' created" % (unicode_to_stdout(alias),) return 0 def list_aliases(options): diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_backup.py new-tahoe-534/src/allmydata/scripts/tahoe_backup.py --- old-tahoe-534/src/allmydata/scripts/tahoe_backup.py 2010-05-17 09:57:49.313905408 +0200 +++ new-tahoe-534/src/allmydata/scripts/tahoe_backup.py 2010-05-17 09:57:49.646405047 +0200 @@ -9,6 +9,11 @@ from allmydata.scripts.common_http import do_http from allmydata.util import time_format from allmydata.scripts import backupdb +import sys +from allmydata.util.stringutils import unicode_to_stdout, listdir_unicode, open_unicode +from allmydata.util.assertutil import precondition +from twisted.python import usage + class HTTPError(Exception): pass @@ -154,12 +159,16 @@ def verboseprint(self, msg): if self.verbosity >= 2: + if isinstance(msg, unicode): + msg = unicode_to_stdout(msg) + print >>self.options.stdout, msg def warn(self, msg): print >>self.options.stderr, msg def process(self, localpath): + precondition(isinstance(localpath, unicode), localpath) # returns newdircap self.verboseprint("processing %s" % localpath) @@ -167,7 +176,7 @@ compare_contents = {} # childname -> rocap try: - children = os.listdir(localpath) + children = listdir_unicode(localpath) except EnvironmentError: self.directories_skipped += 1 self.warn("WARNING: permission denied on directory %s" % localpath) @@ -283,6 +292,8 @@ # This function will raise an IOError exception when called on an unreadable file def upload(self, childpath): + precondition(isinstance(childpath, unicode), childpath) + #self.verboseprint("uploading %s.." % childpath) metadata = get_local_metadata(childpath) @@ -291,7 +302,7 @@ if must_upload: self.verboseprint("uploading %s.." % childpath) - infileobj = open(os.path.expanduser(childpath), "rb") + infileobj = open_unicode(os.path.expanduser(childpath), "rb") url = self.options['node-url'] + "uri" resp = do_http("PUT", url, infileobj) if resp.status not in (200, 201): diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_cp.py new-tahoe-534/src/allmydata/scripts/tahoe_cp.py --- old-tahoe-534/src/allmydata/scripts/tahoe_cp.py 2010-05-17 09:57:49.313905408 +0200 +++ new-tahoe-534/src/allmydata/scripts/tahoe_cp.py 2010-05-17 09:57:49.646405047 +0200 @@ -2,12 +2,17 @@ import os.path import urllib import simplejson +import sys from cStringIO import StringIO from twisted.python.failure import Failure from allmydata.scripts.common import get_alias, escape_path, \ DefaultAliasMarker, UnknownAliasError from allmydata.scripts.common_http import do_http from allmydata import uri +from twisted.python import usage +from allmydata.util.stringutils import unicode_to_url, listdir_unicode, open_unicode +from allmydata.util.assertutil import precondition + def ascii_or_none(s): if s is None: @@ -70,6 +75,7 @@ class LocalFileSource: def __init__(self, pathname): + precondition(isinstance(pathname, unicode), pathname) self.pathname = pathname def need_to_copy_bytes(self): @@ -80,6 +86,7 @@ class LocalFileTarget: def __init__(self, pathname): + precondition(isinstance(pathname, unicode), pathname) self.pathname = pathname def put_file(self, inf): outf = open(self.pathname, "wb") @@ -92,6 +99,7 @@ class LocalMissingTarget: def __init__(self, pathname): + precondition(isinstance(pathname, unicode), pathname) self.pathname = pathname def put_file(self, inf): @@ -105,6 +113,8 @@ class LocalDirectorySource: def __init__(self, progressfunc, pathname): + precondition(isinstance(pathname, unicode), pathname) + self.progressfunc = progressfunc self.pathname = pathname self.children = None @@ -113,7 +123,7 @@ if self.children is not None: return self.children = {} - children = os.listdir(self.pathname) + children = listdir_unicode(self.pathname) for i,n in enumerate(children): self.progressfunc("examining %d of %d" % (i, len(children))) pn = os.path.join(self.pathname, n) @@ -130,6 +140,8 @@ class LocalDirectoryTarget: def __init__(self, progressfunc, pathname): + precondition(isinstance(pathname, unicode), pathname) + self.progressfunc = progressfunc self.pathname = pathname self.children = None @@ -138,7 +150,7 @@ if self.children is not None: return self.children = {} - children = os.listdir(self.pathname) + children = listdir_unicode(self.pathname) for i,n in enumerate(children): self.progressfunc("examining %d of %d" % (i, len(children))) pn = os.path.join(self.pathname, n) @@ -161,8 +173,9 @@ return LocalDirectoryTarget(self.progressfunc, pathname) def put_file(self, name, inf): + precondition(isinstance(name, unicode), name) pathname = os.path.join(self.pathname, name) - outf = open(pathname, "wb") + outf = open_unicode(pathname, "wb") while True: data = inf.read(32768) if not data: @@ -355,7 +368,7 @@ if self.writecap: url = self.nodeurl + "/".join(["uri", urllib.quote(self.writecap), - urllib.quote(name.encode('utf-8'))]) + urllib.quote(unicode_to_url(name))]) self.children[name] = TahoeFileTarget(self.nodeurl, mutable, writecap, readcap, url) elif data[0] == "dirnode": diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_ls.py new-tahoe-534/src/allmydata/scripts/tahoe_ls.py --- old-tahoe-534/src/allmydata/scripts/tahoe_ls.py 2010-05-17 09:57:49.313905408 +0200 +++ new-tahoe-534/src/allmydata/scripts/tahoe_ls.py 2010-05-17 09:57:49.646405047 +0200 @@ -4,6 +4,7 @@ from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ UnknownAliasError from allmydata.scripts.common_http import do_http +from allmydata.util.stringutils import unicode_to_stdout def list(options): nodeurl = options['node-url'] @@ -130,7 +131,7 @@ line.append(ctime_s) if not options["classify"]: classify = "" - line.append(name + classify) + line.append(unicode_to_stdout(name) + classify) if options["uri"]: line.append(uri) if options["readonly-uri"]: diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_manifest.py new-tahoe-534/src/allmydata/scripts/tahoe_manifest.py --- old-tahoe-534/src/allmydata/scripts/tahoe_manifest.py 2010-05-17 09:57:49.313905408 +0200 +++ new-tahoe-534/src/allmydata/scripts/tahoe_manifest.py 2010-05-17 09:57:49.646405047 +0200 @@ -85,7 +85,7 @@ try: print >>stdout, d["cap"], "/".join(d["path"]) except UnicodeEncodeError: - print >>stdout, d["cap"], "/".join([p.encode("utf-8") + print >>stdout, d["cap"], "/".join([unicode_to_stdout(p) for p in d["path"]]) def manifest(options): diff -rN -u old-tahoe-534/src/allmydata/scripts/tahoe_mkdir.py new-tahoe-534/src/allmydata/scripts/tahoe_mkdir.py --- old-tahoe-534/src/allmydata/scripts/tahoe_mkdir.py 2010-05-17 09:57:49.313905408 +0200 +++ new-tahoe-534/src/allmydata/scripts/tahoe_mkdir.py 2010-05-17 09:57:49.646405047 +0200 @@ -2,6 +2,7 @@ import urllib from allmydata.scripts.common_http import do_http, check_http_error from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, UnknownAliasError +from allmydata.util.stringutils import unicode_to_url def mkdir(options): nodeurl = options['node-url'] @@ -35,7 +36,7 @@ path = path[:-1] # path (in argv) must be "/".join([s.encode("utf-8") for s in segments]) url = nodeurl + "uri/%s/%s?t=mkdir" % (urllib.quote(rootcap), - urllib.quote(path)) + urllib.quote(unicode_to_url(path))) resp = do_http("POST", url) check_http_error(resp, stderr) new_uri = resp.read().strip() diff -rN -u old-tahoe-534/src/allmydata/test/test_cli.py new-tahoe-534/src/allmydata/test/test_cli.py --- old-tahoe-534/src/allmydata/test/test_cli.py 2010-05-17 09:57:49.293904649 +0200 +++ new-tahoe-534/src/allmydata/test/test_cli.py 2010-05-17 09:57:49.676405528 +0200 @@ -6,6 +6,7 @@ import urllib import re import simplejson +import sys from allmydata.util import fileutil, hashutil, base32 from allmydata import uri @@ -26,6 +27,9 @@ from twisted.internet import threads # CLI tests use deferToThread from twisted.python import usage +from allmydata.util.stringutils import listdir_unicode, open_unicode, \ + unicode_platform, FilenameEncodingError + timeout = 480 # deep_check takes 360s on Zandr's linksys box, others take > 240s @@ -279,7 +283,7 @@ "work": "WA", "c": "CA"} def ga1(path): - return get_alias(aliases, path, "tahoe") + return get_alias(aliases, path, u"tahoe") uses_lettercolon = common.platform_uses_lettercolon_drivename() self.failUnlessEqual(ga1("bare"), ("TA", "bare")) self.failUnlessEqual(ga1("baredir/file"), ("TA", "baredir/file")) @@ -374,7 +378,7 @@ # default set to something that isn't in the aliases argument should # raise an UnknownAliasError. def ga4(path): - return get_alias(aliases, path, "badddefault:") + return get_alias(aliases, path, u"badddefault:") self.failUnlessRaises(common.UnknownAliasError, ga4, "afile") self.failUnlessRaises(common.UnknownAliasError, ga4, "a/dir/path/") @@ -382,12 +386,44 @@ old = common.pretend_platform_uses_lettercolon try: common.pretend_platform_uses_lettercolon = True - retval = get_alias(aliases, path, "baddefault:") + retval = get_alias(aliases, path, u"baddefault:") finally: common.pretend_platform_uses_lettercolon = old return retval self.failUnlessRaises(common.UnknownAliasError, ga5, "C:\\Windows") + def test_listdir_unicode_good(self): + basedir = u"cli/common/listdir_unicode_good" + fileutil.make_dirs(basedir) + + files = (u'Lôzane', u'Bern', u'Genève') + + for file in files: + open(os.path.join(basedir, file), "w").close() + + for file in listdir_unicode(basedir): + self.failUnlessEqual(file in files, True) + + def test_listdir_unicode_bad(self): + if unicode_platform(): + raise unittest.SkipTest("This test doesn't make any sense on architecture which handle filenames natively as Unicode entities.") + + basedir = u"cli/common/listdir_unicode_bad" + fileutil.make_dirs(basedir) + + files = (u'Lôzane', u'Bern', u'Genève') + + # We use a wrong encoding on purpose + if sys.getfilesystemencoding() == 'UTF-8': + encoding = 'latin1' + else: + encoding = 'UTF-8' + + for file in files: + path = os.path.join(basedir, file).encode(encoding) + open(path, "w").close() + + self.failUnlessRaises(FilenameEncodingError, listdir_unicode, basedir) class Help(unittest.TestCase): @@ -582,6 +618,48 @@ self.failUnless(aliases["un-corrupted2"].startswith("URI:DIR2:")) d.addCallback(_check_not_corrupted) + d.addCallback(lambda res: self.do_cli("create-alias", "études")) + def _check_create_unicode((rc,stdout,stderr)): + self.failUnlessEqual(rc, 0) + self.failIf(stderr) + + # If stdout only supports ascii, accentuated characters are + # being replaced by '?' + if sys.stdout.encoding == "ANSI_X3.4-1968": + self.failUnless("Alias '?tudes' created" in stdout) + else: + self.failUnless("Alias 'études' created" in stdout) + + aliases = get_aliases(self.get_clientdir()) + self.failUnless(aliases[u"études"].startswith("URI:DIR2:")) + d.addCallback(_check_create_unicode) + + d.addCallback(lambda res: self.do_cli("ls", "études:")) + def _check_ls1((rc, stdout, stderr)): + self.failUnlessEqual(rc, 0) + self.failIf(stderr) + + self.failUnlessEqual(stdout, "") + d.addCallback(_check_ls1) + + d.addCallback(lambda res: self.do_cli("put", "-", "études:uploaded.txt", + stdin="Blah blah blah")) + + d.addCallback(lambda res: self.do_cli("ls", "études:")) + def _check_ls2((rc, stdout, stderr)): + self.failUnlessEqual(rc, 0) + self.failIf(stderr) + + self.failUnlessEqual(stdout, "uploaded.txt\n") + d.addCallback(_check_ls2) + + d.addCallback(lambda res: self.do_cli("get", "études:uploaded.txt")) + def _check_get((rc, stdout, stderr)): + self.failUnlessEqual(rc, 0) + self.failIf(stderr) + self.failUnlessEqual(stdout, "Blah blah blah") + d.addCallback(_check_get) + return d @@ -855,6 +933,37 @@ return d + def test_immutable_from_file_unicode(self): + # tahoe put file.txt "à trier.txt" + self.basedir = os.path.dirname(self.mktemp()) + self.set_up_grid() + + rel_fn = os.path.join(self.basedir, "DATAFILE") + abs_fn = os.path.abspath(rel_fn) + # we make the file small enough to fit in a LIT file, for speed + DATA = "short file" + f = open(rel_fn, "w") + f.write(DATA) + f.close() + + d = self.do_cli("create-alias", "tahoe") + + d.addCallback(lambda res: + self.do_cli("put", rel_fn, "à trier.txt")) + def _uploaded((rc,stdout,stderr)): + readcap = stdout.strip() + self.failUnless(readcap.startswith("URI:LIT:")) + self.failUnless("201 Created" in stderr, stderr) + self.readcap = readcap + d.addCallback(_uploaded) + + d.addCallback(lambda res: + self.do_cli("get", "tahoe:à trier.txt")) + d.addCallback(lambda (rc,stdout,stderr): + self.failUnlessEqual(stdout, DATA)) + + return d + class List(GridTestMixin, CLITestMixin, unittest.TestCase): def test_list(self): self.basedir = "cli/List/list" @@ -1138,30 +1247,37 @@ def test_unicode_filename(self): self.basedir = "cli/Cp/unicode_filename" self.set_up_grid() + d = self.do_cli("create-alias", "tahoe") + + # Use unicode strings when calling os functions + if sys.getfilesystemencoding() == "ANSI_X3.4-1968": + fn1 = os.path.join(self.basedir, u"Artonwall") + else: + fn1 = os.path.join(self.basedir, u"Ärtonwall") - fn1 = os.path.join(self.basedir, "Ärtonwall") DATA1 = "unicode file content" fileutil.write(fn1, DATA1) + d.addCallback(lambda res: self.do_cli("cp", fn1.encode('utf-8'), "tahoe:Ärtonwall")) + + d.addCallback(lambda res: self.do_cli("get", "tahoe:Ärtonwall")) + d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA1)) - fn2 = os.path.join(self.basedir, "Metallica") + + fn2 = os.path.join(self.basedir, u"Metallica") DATA2 = "non-unicode file content" fileutil.write(fn2, DATA2) # Bug #534 # Assure that uploading a file whose name contains unicode character doesn't # prevent further uploads in the same directory - d = self.do_cli("create-alias", "tahoe") - d.addCallback(lambda res: self.do_cli("cp", fn1, "tahoe:")) - d.addCallback(lambda res: self.do_cli("cp", fn2, "tahoe:")) - - d.addCallback(lambda res: self.do_cli("get", "tahoe:Ärtonwall")) - d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA1)) + d.addCallback(lambda res: self.do_cli("cp", fn2.encode('utf-8'), "tahoe:")) d.addCallback(lambda res: self.do_cli("get", "tahoe:Metallica")) d.addCallback(lambda (rc,out,err): self.failUnlessEqual(out, DATA2)) + d.addCallback(lambda res: self.do_cli("ls", "tahoe:")) + return d - test_unicode_filename.todo = "This behavior is not yet supported, although it does happen to work (for reasons that are ill-understood) on many platforms. See issue ticket #534." def test_dangling_symlink_vs_recursion(self): if not hasattr(os, 'symlink'): @@ -1268,6 +1384,17 @@ return d +class Mkdir(GridTestMixin, CLITestMixin, unittest.TestCase): + def test_unicode_mkdir(self): + self.basedir = os.path.dirname(self.mktemp()) + self.set_up_grid() + + d = self.do_cli("create-alias", "tahoe") + d.addCallback(lambda res: self.do_cli("mkdir", "tahoe:Motörhead")) + + return d + + class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase): def writeto(self, path, data):