comparison upreckon/files.py @ 195:c2490e39fd70

Revamped the implementation of files.Archive subclasses They now normalize and sanitize all paths and provide a listdir method. TarArchive also ignores all files that are not regular files or directories.
author Oleg Oshmyan <chortos@inbox.lv>
date Sun, 14 Aug 2011 01:02:10 +0300
parents 8c30a2c8a09e
children 67088c1765b4
comparison
equal deleted inserted replaced
194:8c30a2c8a09e 195:c2490e39fd70
14 # all full stops will be converted to os.extsep on the fly 14 # all full stops will be converted to os.extsep on the fly
15 archives = 'tests.tar', 'tests.zip', 'tests.tgz', 'tests.tar.gz', 'tests.tbz2', 'tests.tar.bz2' 15 archives = 'tests.tar', 'tests.zip', 'tests.tgz', 'tests.tar.gz', 'tests.tbz2', 'tests.tar.bz2'
16 formats = {} 16 formats = {}
17 17
18 class Archive(object): 18 class Archive(object):
19 __slots__ = 'file' 19 __slots__ = ()
20 20
21 if ABCMeta: 21 if ABCMeta:
22 __metaclass__ = ABCMeta 22 __metaclass__ = ABCMeta
23 23
24 def __new__(cls, path): 24 def __new__(cls, path):
41 @abstractmethod 41 @abstractmethod
42 def __init__(self, path): raise NotImplementedError 42 def __init__(self, path): raise NotImplementedError
43 43
44 @abstractmethod 44 @abstractmethod
45 def extract(self, name, target): raise NotImplementedError 45 def extract(self, name, target): raise NotImplementedError
46
47 @abstractmethod
48 def open(self, name): raise NotImplementedError
49
50 @abstractmethod
51 def exists(self, name): raise NotImplementedError
52
53 @abstractmethod
54 def listdir(self, name): raise NotImplementedError
46 55
47 try: 56 try:
48 import tarfile 57 import tarfile
49 except ImportError: 58 except ImportError:
50 TarArchive = None 59 TarArchive = None
51 else: 60 else:
52 class TarArchive(Archive): 61 class TarArchive(Archive):
53 __slots__ = '_namelist' 62 __slots__ = '_tarfile', '_files', '_dirs', '_names'
54 63
55 def __init__(self, path): 64 def __init__(self, path):
56 self.file = tarfile.open(path) 65 self._tarfile = tarfile.open(path)
66 files, dirs = {}, set()
67 for member in self._tarfile.getmembers():
68 cutname = posixpath.normpath(member.name).lstrip('/')
69 while cutname.startswith('../'):
70 cutname = cutname[3:]
71 if cutname in ('.', '..'):
72 continue
73 if member.isfile():
74 files[cutname] = member
75 cutname = posixpath.dirname(cutname)
76 elif not member.isdir():
77 continue
78 while cutname:
79 dirs.add(cutname)
80 cutname = posixpath.dirname(cutname)
81 self._files = files
82 self._dirs = frozenset(dirs)
83 self._names = self._dirs | frozenset(files)
57 84
58 def extract(self, name, target): 85 def extract(self, name, target):
59 member = self.file.getmember(name) 86 member = self._files[posixpath.normpath(name)]
60 member.name = target 87 member.name = target
61 self.file.extract(member) 88 self._tarfile.extract(member)
62 89
63 def open(self, name): 90 def open(self, name):
64 return self.file.extractfile(name) 91 name = posixpath.normpath(name)
65 92 return self._tarfile.extractfile(self._files[name])
66 def exists(self, queried_name): 93
67 if not hasattr(self, '_namelist'): 94 def exists(self, name):
68 names = set() 95 return posixpath.normpath(name) in self._names
69 for name in self.file.getnames(): 96
70 cutname = name 97 def listdir(self, name):
71 while cutname: 98 normname = posixpath.normpath(name)
72 names.add(cutname) 99 if normname not in self._dirs:
73 cutname = cutname.rpartition('/')[0] 100 raise KeyError('No such directory: %r' % name)
74 self._namelist = frozenset(names) 101 normname += '/'
75 return queried_name in self._namelist 102 len_normname = len(normname)
103 return [fname for fname in self._names
104 if fname.startswith(normname) and
105 fname.find('/', len_normname) == -1]
76 106
77 def __enter__(self): 107 def __enter__(self):
78 if hasattr(self.file, '__enter__'): 108 if hasattr(self._tarfile, '__enter__'):
79 self.file.__enter__() 109 self._tarfile.__enter__()
80 return self 110 return self
81 111
82 def __exit__(self, exc_type, exc_value, traceback): 112 def __exit__(self, exc_type, exc_value, traceback):
83 if hasattr(self.file, '__exit__'): 113 if hasattr(self._tarfile, '__exit__'):
84 return self.file.__exit__(exc_type, exc_value, traceback) 114 return self._tarfile.__exit__(exc_type, exc_value, traceback)
85 elif exc_type is None: 115 elif exc_type is None:
86 self.file.close() 116 self._tarfile.close()
87 else: 117 else:
88 # This code was shamelessly copied from tarfile.py of Python 2.7 118 # This code was shamelessly copied from tarfile.py of Python 2.7
89 if not self.file._extfileobj: 119 if not self._tarfile._extfileobj:
90 self.file.fileobj.close() 120 self._tarfile.fileobj.close()
91 self.file.closed = True 121 self._tarfile.closed = True
92 122
93 formats['tar'] = formats['tgz'] = formats['tar.gz'] = formats['tbz2'] = formats['tar.bz2'] = TarArchive 123 formats['tar'] = formats['tgz'] = formats['tar.gz'] = formats['tbz2'] = formats['tar.bz2'] = TarArchive
94 124
95 try: 125 try:
96 import zipfile 126 import zipfile
97 except ImportError: 127 except ImportError:
98 ZipArchive = None 128 ZipArchive = None
99 else: 129 else:
100 class ZipArchive(Archive): 130 class ZipArchive(Archive):
101 __slots__ = '_namelist' 131 __slots__ = '_zipfile', '_files', '_dirs', '_names'
102 132
103 def __init__(self, path): 133 def __init__(self, path):
104 self.file = zipfile.ZipFile(path) 134 self._zipfile = zipfile.ZipFile(path)
135 files, dirs = {}, set()
136 for member in self._zipfile.infolist():
137 cutname = posixpath.normpath(member.filename).lstrip('/')
138 while cutname.startswith('../'):
139 cutname = cutname[3:]
140 if cutname in ('.', '..'):
141 continue
142 if not member.filename.endswith('/'):
143 files[cutname] = member
144 cutname = posixpath.dirname(cutname)
145 while cutname:
146 dirs.add(cutname)
147 cutname = posixpath.dirname(cutname)
148 self._files = files
149 self._dirs = frozenset(dirs)
150 self._names = self._dirs | frozenset(files)
105 151
106 def extract(self, name, target): 152 def extract(self, name, target):
107 member = self.file.getinfo(name) 153 member = self._files[posixpath.normpath(name)]
108 # FIXME: 2.5 lacks ZipFile.extract 154 # FIXME: 2.5 lacks ZipFile.extract
109 if os.path.isabs(target): 155 if os.path.isabs(target):
110 # To my knowledge, this is as portable as it gets 156 # To my knowledge, this is as portable as it gets
111 path = os.path.join(os.path.splitdrive(target)[0], os.path.sep) 157 path = os.path.join(os.path.splitdrive(target)[0], os.path.sep)
112 member.filename = os.path.relpath(target, path) 158 member.filename = os.path.relpath(target, path)
113 self.file.extract(member, path) 159 self._zipfile.extract(member, path)
114 else: 160 else:
115 member.filename = os.path.relpath(target) 161 member.filename = os.path.relpath(target)
116 self.file.extract(member) 162 self._zipfile.extract(member)
117 163
118 def open(self, name): 164 def open(self, name):
119 return self.file.open(name, 'r') 165 name = posixpath.normpath(name)
120 166 # FIXME: 2.5 lacks ZipFile.open
121 def exists(self, queried_name): 167 return self._zipfile.open(self._files[name])
122 if not hasattr(self, '_namelist'): 168
123 names = set() 169 def exists(self, name):
124 for name in self.file.namelist(): 170 return posixpath.normpath(name) in self._names
125 cutname = name 171
126 while cutname: 172 def listdir(self, name):
127 names.add(cutname) 173 normname = posixpath.normpath(name)
128 cutname = cutname.rpartition('/')[0] 174 if normname not in self._dirs:
129 self._namelist = frozenset(names) 175 raise KeyError('No such directory: %r' % name)
130 return queried_name in self._namelist 176 normname += '/'
177 len_normname = len(normname)
178 return [fname for fname in self._names
179 if fname.startswith(normname) and
180 fname.find('/', len_normname) == -1]
131 181
132 def __enter__(self): 182 def __enter__(self):
133 if hasattr(self.file, '__enter__'): 183 if hasattr(self._zipfile, '__enter__'):
134 self.file.__enter__() 184 self._zipfile.__enter__()
135 return self 185 return self
136 186
137 def __exit__(self, exc_type, exc_value, traceback): 187 def __exit__(self, exc_type, exc_value, traceback):
138 if hasattr(self.file, '__exit__'): 188 if hasattr(self._zipfile, '__exit__'):
139 return self.file.__exit__(exc_type, exc_value, traceback) 189 return self._zipfile.__exit__(exc_type, exc_value, traceback)
140 else: 190 else:
141 return self.file.close() 191 return self._zipfile.close()
142 192
143 formats['zip'] = ZipArchive 193 formats['zip'] = ZipArchive
144 194
145 # Remove unsupported archive formats and replace full stops 195 # Remove unsupported archive formats and replace full stops
146 # with the platform-dependent file name extension separator 196 # with the platform-dependent file name extension separator