Mercurial > ~astiob > upreckon > hgweb
comparison upreckon/files.py @ 195:c2490e39fd70
Revamped the implementation of files.Archive subclasses
They now normalize and sanitize all paths and provide a listdir method.
TarArchive also ignores all files that are not regular files or directories.
author | Oleg Oshmyan <chortos@inbox.lv> |
---|---|
date | Sun, 14 Aug 2011 01:02:10 +0300 |
parents | 8c30a2c8a09e |
children | 67088c1765b4 |
comparison
equal
deleted
inserted
replaced
194:8c30a2c8a09e | 195:c2490e39fd70 |
---|---|
14 # all full stops will be converted to os.extsep on the fly | 14 # all full stops will be converted to os.extsep on the fly |
15 archives = 'tests.tar', 'tests.zip', 'tests.tgz', 'tests.tar.gz', 'tests.tbz2', 'tests.tar.bz2' | 15 archives = 'tests.tar', 'tests.zip', 'tests.tgz', 'tests.tar.gz', 'tests.tbz2', 'tests.tar.bz2' |
16 formats = {} | 16 formats = {} |
17 | 17 |
18 class Archive(object): | 18 class Archive(object): |
19 __slots__ = 'file' | 19 __slots__ = () |
20 | 20 |
21 if ABCMeta: | 21 if ABCMeta: |
22 __metaclass__ = ABCMeta | 22 __metaclass__ = ABCMeta |
23 | 23 |
24 def __new__(cls, path): | 24 def __new__(cls, path): |
41 @abstractmethod | 41 @abstractmethod |
42 def __init__(self, path): raise NotImplementedError | 42 def __init__(self, path): raise NotImplementedError |
43 | 43 |
44 @abstractmethod | 44 @abstractmethod |
45 def extract(self, name, target): raise NotImplementedError | 45 def extract(self, name, target): raise NotImplementedError |
46 | |
47 @abstractmethod | |
48 def open(self, name): raise NotImplementedError | |
49 | |
50 @abstractmethod | |
51 def exists(self, name): raise NotImplementedError | |
52 | |
53 @abstractmethod | |
54 def listdir(self, name): raise NotImplementedError | |
46 | 55 |
47 try: | 56 try: |
48 import tarfile | 57 import tarfile |
49 except ImportError: | 58 except ImportError: |
50 TarArchive = None | 59 TarArchive = None |
51 else: | 60 else: |
52 class TarArchive(Archive): | 61 class TarArchive(Archive): |
53 __slots__ = '_namelist' | 62 __slots__ = '_tarfile', '_files', '_dirs', '_names' |
54 | 63 |
55 def __init__(self, path): | 64 def __init__(self, path): |
56 self.file = tarfile.open(path) | 65 self._tarfile = tarfile.open(path) |
66 files, dirs = {}, set() | |
67 for member in self._tarfile.getmembers(): | |
68 cutname = posixpath.normpath(member.name).lstrip('/') | |
69 while cutname.startswith('../'): | |
70 cutname = cutname[3:] | |
71 if cutname in ('.', '..'): | |
72 continue | |
73 if member.isfile(): | |
74 files[cutname] = member | |
75 cutname = posixpath.dirname(cutname) | |
76 elif not member.isdir(): | |
77 continue | |
78 while cutname: | |
79 dirs.add(cutname) | |
80 cutname = posixpath.dirname(cutname) | |
81 self._files = files | |
82 self._dirs = frozenset(dirs) | |
83 self._names = self._dirs | frozenset(files) | |
57 | 84 |
58 def extract(self, name, target): | 85 def extract(self, name, target): |
59 member = self.file.getmember(name) | 86 member = self._files[posixpath.normpath(name)] |
60 member.name = target | 87 member.name = target |
61 self.file.extract(member) | 88 self._tarfile.extract(member) |
62 | 89 |
63 def open(self, name): | 90 def open(self, name): |
64 return self.file.extractfile(name) | 91 name = posixpath.normpath(name) |
65 | 92 return self._tarfile.extractfile(self._files[name]) |
66 def exists(self, queried_name): | 93 |
67 if not hasattr(self, '_namelist'): | 94 def exists(self, name): |
68 names = set() | 95 return posixpath.normpath(name) in self._names |
69 for name in self.file.getnames(): | 96 |
70 cutname = name | 97 def listdir(self, name): |
71 while cutname: | 98 normname = posixpath.normpath(name) |
72 names.add(cutname) | 99 if normname not in self._dirs: |
73 cutname = cutname.rpartition('/')[0] | 100 raise KeyError('No such directory: %r' % name) |
74 self._namelist = frozenset(names) | 101 normname += '/' |
75 return queried_name in self._namelist | 102 len_normname = len(normname) |
103 return [fname for fname in self._names | |
104 if fname.startswith(normname) and | |
105 fname.find('/', len_normname) == -1] | |
76 | 106 |
77 def __enter__(self): | 107 def __enter__(self): |
78 if hasattr(self.file, '__enter__'): | 108 if hasattr(self._tarfile, '__enter__'): |
79 self.file.__enter__() | 109 self._tarfile.__enter__() |
80 return self | 110 return self |
81 | 111 |
82 def __exit__(self, exc_type, exc_value, traceback): | 112 def __exit__(self, exc_type, exc_value, traceback): |
83 if hasattr(self.file, '__exit__'): | 113 if hasattr(self._tarfile, '__exit__'): |
84 return self.file.__exit__(exc_type, exc_value, traceback) | 114 return self._tarfile.__exit__(exc_type, exc_value, traceback) |
85 elif exc_type is None: | 115 elif exc_type is None: |
86 self.file.close() | 116 self._tarfile.close() |
87 else: | 117 else: |
88 # This code was shamelessly copied from tarfile.py of Python 2.7 | 118 # This code was shamelessly copied from tarfile.py of Python 2.7 |
89 if not self.file._extfileobj: | 119 if not self._tarfile._extfileobj: |
90 self.file.fileobj.close() | 120 self._tarfile.fileobj.close() |
91 self.file.closed = True | 121 self._tarfile.closed = True |
92 | 122 |
93 formats['tar'] = formats['tgz'] = formats['tar.gz'] = formats['tbz2'] = formats['tar.bz2'] = TarArchive | 123 formats['tar'] = formats['tgz'] = formats['tar.gz'] = formats['tbz2'] = formats['tar.bz2'] = TarArchive |
94 | 124 |
95 try: | 125 try: |
96 import zipfile | 126 import zipfile |
97 except ImportError: | 127 except ImportError: |
98 ZipArchive = None | 128 ZipArchive = None |
99 else: | 129 else: |
100 class ZipArchive(Archive): | 130 class ZipArchive(Archive): |
101 __slots__ = '_namelist' | 131 __slots__ = '_zipfile', '_files', '_dirs', '_names' |
102 | 132 |
103 def __init__(self, path): | 133 def __init__(self, path): |
104 self.file = zipfile.ZipFile(path) | 134 self._zipfile = zipfile.ZipFile(path) |
135 files, dirs = {}, set() | |
136 for member in self._zipfile.infolist(): | |
137 cutname = posixpath.normpath(member.filename).lstrip('/') | |
138 while cutname.startswith('../'): | |
139 cutname = cutname[3:] | |
140 if cutname in ('.', '..'): | |
141 continue | |
142 if not member.filename.endswith('/'): | |
143 files[cutname] = member | |
144 cutname = posixpath.dirname(cutname) | |
145 while cutname: | |
146 dirs.add(cutname) | |
147 cutname = posixpath.dirname(cutname) | |
148 self._files = files | |
149 self._dirs = frozenset(dirs) | |
150 self._names = self._dirs | frozenset(files) | |
105 | 151 |
106 def extract(self, name, target): | 152 def extract(self, name, target): |
107 member = self.file.getinfo(name) | 153 member = self._files[posixpath.normpath(name)] |
108 # FIXME: 2.5 lacks ZipFile.extract | 154 # FIXME: 2.5 lacks ZipFile.extract |
109 if os.path.isabs(target): | 155 if os.path.isabs(target): |
110 # To my knowledge, this is as portable as it gets | 156 # To my knowledge, this is as portable as it gets |
111 path = os.path.join(os.path.splitdrive(target)[0], os.path.sep) | 157 path = os.path.join(os.path.splitdrive(target)[0], os.path.sep) |
112 member.filename = os.path.relpath(target, path) | 158 member.filename = os.path.relpath(target, path) |
113 self.file.extract(member, path) | 159 self._zipfile.extract(member, path) |
114 else: | 160 else: |
115 member.filename = os.path.relpath(target) | 161 member.filename = os.path.relpath(target) |
116 self.file.extract(member) | 162 self._zipfile.extract(member) |
117 | 163 |
118 def open(self, name): | 164 def open(self, name): |
119 return self.file.open(name, 'r') | 165 name = posixpath.normpath(name) |
120 | 166 # FIXME: 2.5 lacks ZipFile.open |
121 def exists(self, queried_name): | 167 return self._zipfile.open(self._files[name]) |
122 if not hasattr(self, '_namelist'): | 168 |
123 names = set() | 169 def exists(self, name): |
124 for name in self.file.namelist(): | 170 return posixpath.normpath(name) in self._names |
125 cutname = name | 171 |
126 while cutname: | 172 def listdir(self, name): |
127 names.add(cutname) | 173 normname = posixpath.normpath(name) |
128 cutname = cutname.rpartition('/')[0] | 174 if normname not in self._dirs: |
129 self._namelist = frozenset(names) | 175 raise KeyError('No such directory: %r' % name) |
130 return queried_name in self._namelist | 176 normname += '/' |
177 len_normname = len(normname) | |
178 return [fname for fname in self._names | |
179 if fname.startswith(normname) and | |
180 fname.find('/', len_normname) == -1] | |
131 | 181 |
132 def __enter__(self): | 182 def __enter__(self): |
133 if hasattr(self.file, '__enter__'): | 183 if hasattr(self._zipfile, '__enter__'): |
134 self.file.__enter__() | 184 self._zipfile.__enter__() |
135 return self | 185 return self |
136 | 186 |
137 def __exit__(self, exc_type, exc_value, traceback): | 187 def __exit__(self, exc_type, exc_value, traceback): |
138 if hasattr(self.file, '__exit__'): | 188 if hasattr(self._zipfile, '__exit__'): |
139 return self.file.__exit__(exc_type, exc_value, traceback) | 189 return self._zipfile.__exit__(exc_type, exc_value, traceback) |
140 else: | 190 else: |
141 return self.file.close() | 191 return self._zipfile.close() |
142 | 192 |
143 formats['zip'] = ZipArchive | 193 formats['zip'] = ZipArchive |
144 | 194 |
145 # Remove unsupported archive formats and replace full stops | 195 # Remove unsupported archive formats and replace full stops |
146 # with the platform-dependent file name extension separator | 196 # with the platform-dependent file name extension separator |