# A wrapper around the Python Standard Library's filename access functions to # provide a uniform API for all platforms and to prevent lossy en/de-coding. class Fname: def __init__(self, name, failed_decode=False, alleged_encoding=None): self.name = name self.failed_decode = failed_decode self.alleged_encoding = alleged_encoding if platform.system() in ('Linux', 'Solaris'): # on byte-oriented filesystems, such as Linux and Solaris def unicode_to_fs(fn): """ Encode an unicode object to bytes. """ precondition(isinstance(fn, Fname), fn) precondition(isinstance(fn.name, unicode), fn.name) if fn.failed_decode: # This means that the unicode string in .name is not actually the # result of a successful decoding with a suggested codec, but is # instead the result of stuffing the bytes into a unicode by dint # of the utf-8b trick. This means that on a byte-oriented system, # you shouldn't treat the .name as a unicode string containing # chars, but instead you should get the original bytes back out of # it. return fn.name.encode('utf-8b', 'python-replace') else: fsencoding = sys.getfilesystemencoding() if fsencoding in (None, '', 'ascii', 'utf-8'): fsencoding = 'utf-8b' try: return fn.name.encode(encoding, 'python-escape') except UnicodeEncodeError: raise usage.UsageError("Filename '%s' cannot be encoded using \ the current encoding of your filesystem (%s). Please configure your locale \ correctly or rename this file." % (s, sys.getfilesystemencoding())) def fs_to_unicode(bytesfn): """ Decode bytes from the filesystem to a unicode object. """ precondition(isinstance(bytesfn, str), str) alleged_encoding = sys.getfilesystemencoding() if alleged_encoding in (None, '', 'ascii', 'utf-8'): alleged_encoding = 'utf-8b' try: unicodefn = bytesfn.decode(alleged_encoding, 'strict') except UnicodeEncodeError: unicodefn = bytesfn.decode('utf-8b', 'python-escape') return Fname(unicodefn) else: unicodefn = unicodedata.normalize('NFC', unicodefn) if alleged_encoding == 'utf-8b': return Fname(unicodefn) else: return Fname(unicodefn, alleged_encoding) def listdir(fn): assert isinstance(fn, Fname), fn assert isinstance(fn.name, unicode), fn.name bytesfn = unicode_to_fs(fn.name) res = os.listdir(bytesfn) return([fs_to_unicode(fn) for fn in res]) else: # on unicode-oriented filesystems, such as Mac and Windows def listdir(fn): assert isinstance(fn, Fname), fn assert isinstance(fn.name, unicode), fn.name return [Fname(n) for n in os.listdir(fn.name)]