Mercurial > ~astiob > upreckon > hgweb
annotate zipfiles/zipfile27.py @ 246:1bc89faac941 2.04
Fixed: match='re' could produce duplicate test identifiers
files.Files.regexp(pattern) now makes sure to return only one
metafile for each matching virtual path, namely, the one that would
be returned for that virtual path by files.Files.from_virtual_path.
author | Oleg Oshmyan <chortos@inbox.lv> |
---|---|
date | Thu, 03 Oct 2013 01:19:09 +0300 |
parents | 45d4a9dc707b |
children |
rev | line source |
---|---|
21 | 1 """ |
2 Read and write ZIP files. | |
3 """ | |
4 # Improved by Chortos-2 in 2010 (added bzip2 support) | |
5 import struct, os, time, sys, shutil | |
6 import binascii, cStringIO, stat | |
7 import io | |
8 import re | |
9 | |
10 try: | |
11 import zlib # We may need its compression method | |
12 crc32 = zlib.crc32 | |
13 except ImportError: | |
14 zlib = None | |
15 crc32 = binascii.crc32 | |
16 | |
17 try: | |
18 import bz2 # We may need its compression method | |
19 except ImportError: | |
20 bz2 = None | |
21 | |
22 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile", | |
23 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", "ZIP_BZIP2" ] | |
24 | |
25 class BadZipfile(Exception): | |
26 pass | |
27 | |
28 | |
29 class LargeZipFile(Exception): | |
30 """ | |
31 Raised when writing a zipfile, the zipfile requires ZIP64 extensions | |
32 and those extensions are disabled. | |
33 """ | |
34 | |
35 error = BadZipfile # The exception raised by this module | |
36 | |
37 ZIP64_LIMIT = (1 << 31) - 1 | |
38 ZIP_FILECOUNT_LIMIT = 1 << 16 | |
39 ZIP_MAX_COMMENT = (1 << 16) - 1 | |
40 | |
41 # constants for Zip file compression methods | |
42 ZIP_STORED = 0 | |
43 ZIP_DEFLATED = 8 | |
44 ZIP_BZIP2 = 12 | |
45 # Other ZIP compression methods not supported | |
46 | |
47 # Below are some formats and associated data for reading/writing headers using | |
48 # the struct module. The names and structures of headers/records are those used | |
49 # in the PKWARE description of the ZIP file format: | |
50 # http://www.pkware.com/documents/casestudies/APPNOTE.TXT | |
51 # (URL valid as of January 2008) | |
52 | |
53 # The "end of central directory" structure, magic number, size, and indices | |
54 # (section V.I in the format document) | |
55 structEndArchive = "<4s4H2LH" | |
56 stringEndArchive = "PK\005\006" | |
57 sizeEndCentDir = struct.calcsize(structEndArchive) | |
58 | |
59 _ECD_SIGNATURE = 0 | |
60 _ECD_DISK_NUMBER = 1 | |
61 _ECD_DISK_START = 2 | |
62 _ECD_ENTRIES_THIS_DISK = 3 | |
63 _ECD_ENTRIES_TOTAL = 4 | |
64 _ECD_SIZE = 5 | |
65 _ECD_OFFSET = 6 | |
66 _ECD_COMMENT_SIZE = 7 | |
67 # These last two indices are not part of the structure as defined in the | |
68 # spec, but they are used internally by this module as a convenience | |
69 _ECD_COMMENT = 8 | |
70 _ECD_LOCATION = 9 | |
71 | |
72 # The "central directory" structure, magic number, size, and indices | |
73 # of entries in the structure (section V.F in the format document) | |
74 structCentralDir = "<4s4B4HL2L5H2L" | |
75 stringCentralDir = "PK\001\002" | |
76 sizeCentralDir = struct.calcsize(structCentralDir) | |
77 | |
78 # indexes of entries in the central directory structure | |
79 _CD_SIGNATURE = 0 | |
80 _CD_CREATE_VERSION = 1 | |
81 _CD_CREATE_SYSTEM = 2 | |
82 _CD_EXTRACT_VERSION = 3 | |
83 _CD_EXTRACT_SYSTEM = 4 | |
84 _CD_FLAG_BITS = 5 | |
85 _CD_COMPRESS_TYPE = 6 | |
86 _CD_TIME = 7 | |
87 _CD_DATE = 8 | |
88 _CD_CRC = 9 | |
89 _CD_COMPRESSED_SIZE = 10 | |
90 _CD_UNCOMPRESSED_SIZE = 11 | |
91 _CD_FILENAME_LENGTH = 12 | |
92 _CD_EXTRA_FIELD_LENGTH = 13 | |
93 _CD_COMMENT_LENGTH = 14 | |
94 _CD_DISK_NUMBER_START = 15 | |
95 _CD_INTERNAL_FILE_ATTRIBUTES = 16 | |
96 _CD_EXTERNAL_FILE_ATTRIBUTES = 17 | |
97 _CD_LOCAL_HEADER_OFFSET = 18 | |
98 | |
99 # The "local file header" structure, magic number, size, and indices | |
100 # (section V.A in the format document) | |
101 structFileHeader = "<4s2B4HL2L2H" | |
102 stringFileHeader = "PK\003\004" | |
103 sizeFileHeader = struct.calcsize(structFileHeader) | |
104 | |
105 _FH_SIGNATURE = 0 | |
106 _FH_EXTRACT_VERSION = 1 | |
107 _FH_EXTRACT_SYSTEM = 2 | |
108 _FH_GENERAL_PURPOSE_FLAG_BITS = 3 | |
109 _FH_COMPRESSION_METHOD = 4 | |
110 _FH_LAST_MOD_TIME = 5 | |
111 _FH_LAST_MOD_DATE = 6 | |
112 _FH_CRC = 7 | |
113 _FH_COMPRESSED_SIZE = 8 | |
114 _FH_UNCOMPRESSED_SIZE = 9 | |
115 _FH_FILENAME_LENGTH = 10 | |
116 _FH_EXTRA_FIELD_LENGTH = 11 | |
117 | |
118 # The "Zip64 end of central directory locator" structure, magic number, and size | |
119 structEndArchive64Locator = "<4sLQL" | |
120 stringEndArchive64Locator = "PK\x06\x07" | |
121 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator) | |
122 | |
123 # The "Zip64 end of central directory" record, magic number, size, and indices | |
124 # (section V.G in the format document) | |
125 structEndArchive64 = "<4sQ2H2L4Q" | |
126 stringEndArchive64 = "PK\x06\x06" | |
127 sizeEndCentDir64 = struct.calcsize(structEndArchive64) | |
128 | |
129 _CD64_SIGNATURE = 0 | |
130 _CD64_DIRECTORY_RECSIZE = 1 | |
131 _CD64_CREATE_VERSION = 2 | |
132 _CD64_EXTRACT_VERSION = 3 | |
133 _CD64_DISK_NUMBER = 4 | |
134 _CD64_DISK_NUMBER_START = 5 | |
135 _CD64_NUMBER_ENTRIES_THIS_DISK = 6 | |
136 _CD64_NUMBER_ENTRIES_TOTAL = 7 | |
137 _CD64_DIRECTORY_SIZE = 8 | |
138 _CD64_OFFSET_START_CENTDIR = 9 | |
139 | |
140 def _check_zipfile(fp): | |
141 try: | |
142 if _EndRecData(fp): | |
143 return True # file has correct magic number | |
144 except IOError: | |
145 pass | |
146 return False | |
147 | |
148 def is_zipfile(filename): | |
149 """Quickly see if a file is a ZIP file by checking the magic number. | |
150 | |
151 The filename argument may be a file or file-like object too. | |
152 """ | |
153 result = False | |
154 try: | |
155 if hasattr(filename, "read"): | |
156 result = _check_zipfile(fp=filename) | |
157 else: | |
158 with open(filename, "rb") as fp: | |
159 result = _check_zipfile(fp) | |
160 except IOError: | |
161 pass | |
162 return result | |
163 | |
164 def _EndRecData64(fpin, offset, endrec): | |
165 """ | |
166 Read the ZIP64 end-of-archive records and use that to update endrec | |
167 """ | |
32
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
168 try: |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
169 fpin.seek(offset - sizeEndCentDir64Locator, 2) |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
170 except IOError: |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
171 # If the seek fails, the file is not large enough to contain a ZIP64 |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
172 # end-of-archive record, so just return the end record we were given. |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
173 return endrec |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
174 |
21 | 175 data = fpin.read(sizeEndCentDir64Locator) |
176 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) | |
177 if sig != stringEndArchive64Locator: | |
178 return endrec | |
179 | |
180 if diskno != 0 or disks != 1: | |
181 raise BadZipfile("zipfiles that span multiple disks are not supported") | |
182 | |
183 # Assume no 'zip64 extensible data' | |
184 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) | |
185 data = fpin.read(sizeEndCentDir64) | |
186 sig, sz, create_version, read_version, disk_num, disk_dir, \ | |
187 dircount, dircount2, dirsize, diroffset = \ | |
188 struct.unpack(structEndArchive64, data) | |
189 if sig != stringEndArchive64: | |
190 return endrec | |
191 | |
192 # Update the original endrec using data from the ZIP64 record | |
193 endrec[_ECD_SIGNATURE] = sig | |
194 endrec[_ECD_DISK_NUMBER] = disk_num | |
195 endrec[_ECD_DISK_START] = disk_dir | |
196 endrec[_ECD_ENTRIES_THIS_DISK] = dircount | |
197 endrec[_ECD_ENTRIES_TOTAL] = dircount2 | |
198 endrec[_ECD_SIZE] = dirsize | |
199 endrec[_ECD_OFFSET] = diroffset | |
200 return endrec | |
201 | |
202 | |
203 def _EndRecData(fpin): | |
204 """Return data from the "End of Central Directory" record, or None. | |
205 | |
206 The data is a list of the nine items in the ZIP "End of central dir" | |
207 record followed by a tenth item, the file seek offset of this record.""" | |
208 | |
209 # Determine file size | |
210 fpin.seek(0, 2) | |
211 filesize = fpin.tell() | |
212 | |
213 # Check to see if this is ZIP file with no archive comment (the | |
214 # "end of central directory" structure should be the last item in the | |
215 # file if this is the case). | |
216 try: | |
217 fpin.seek(-sizeEndCentDir, 2) | |
218 except IOError: | |
219 return None | |
220 data = fpin.read() | |
221 if data[0:4] == stringEndArchive and data[-2:] == "\000\000": | |
222 # the signature is correct and there's no comment, unpack structure | |
223 endrec = struct.unpack(structEndArchive, data) | |
224 endrec=list(endrec) | |
225 | |
226 # Append a blank comment and record start offset | |
227 endrec.append("") | |
228 endrec.append(filesize - sizeEndCentDir) | |
229 | |
230 # Try to read the "Zip64 end of central directory" structure | |
231 return _EndRecData64(fpin, -sizeEndCentDir, endrec) | |
232 | |
233 # Either this is not a ZIP file, or it is a ZIP file with an archive | |
234 # comment. Search the end of the file for the "end of central directory" | |
235 # record signature. The comment is the last item in the ZIP file and may be | |
236 # up to 64K long. It is assumed that the "end of central directory" magic | |
237 # number does not appear in the comment. | |
238 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0) | |
239 fpin.seek(maxCommentStart, 0) | |
240 data = fpin.read() | |
241 start = data.rfind(stringEndArchive) | |
242 if start >= 0: | |
243 # found the magic number; attempt to unpack and interpret | |
244 recData = data[start:start+sizeEndCentDir] | |
245 endrec = list(struct.unpack(structEndArchive, recData)) | |
246 comment = data[start+sizeEndCentDir:] | |
247 # check that comment length is correct | |
248 if endrec[_ECD_COMMENT_SIZE] == len(comment): | |
249 # Append the archive comment and start offset | |
250 endrec.append(comment) | |
251 endrec.append(maxCommentStart + start) | |
252 | |
253 # Try to read the "Zip64 end of central directory" structure | |
254 return _EndRecData64(fpin, maxCommentStart + start - filesize, | |
255 endrec) | |
256 | |
257 # Unable to find a valid end of central directory structure | |
258 return | |
259 | |
260 | |
261 class ZipInfo (object): | |
262 """Class with attributes describing each file in the ZIP archive.""" | |
263 | |
264 __slots__ = ( | |
265 'orig_filename', | |
266 'filename', | |
267 'date_time', | |
268 'compress_type', | |
269 'comment', | |
270 'extra', | |
271 'create_system', | |
272 'create_version', | |
273 'extract_version', | |
274 'reserved', | |
275 'flag_bits', | |
276 'volume', | |
277 'internal_attr', | |
278 'external_attr', | |
279 'header_offset', | |
280 'CRC', | |
281 'compress_size', | |
282 'file_size', | |
283 '_raw_time', | |
284 ) | |
285 | |
286 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)): | |
287 self.orig_filename = filename # Original file name in archive | |
288 | |
289 # Terminate the file name at the first null byte. Null bytes in file | |
290 # names are used as tricks by viruses in archives. | |
291 null_byte = filename.find(chr(0)) | |
292 if null_byte >= 0: | |
293 filename = filename[0:null_byte] | |
294 # This is used to ensure paths in generated ZIP files always use | |
295 # forward slashes as the directory separator, as required by the | |
296 # ZIP format specification. | |
297 if os.sep != "/" and os.sep in filename: | |
298 filename = filename.replace(os.sep, "/") | |
299 | |
300 self.filename = filename # Normalized file name | |
301 self.date_time = date_time # year, month, day, hour, min, sec | |
302 # Standard values: | |
303 self.compress_type = ZIP_STORED # Type of compression for the file | |
304 self.comment = "" # Comment for each file | |
305 self.extra = "" # ZIP extra data | |
306 if sys.platform == 'win32': | |
307 self.create_system = 0 # System which created ZIP archive | |
308 else: | |
309 # Assume everything else is unix-y | |
310 self.create_system = 3 # System which created ZIP archive | |
311 self.create_version = 20 # Version which created ZIP archive | |
312 self.extract_version = 20 # Version needed to extract archive | |
313 self.reserved = 0 # Must be zero | |
314 self.flag_bits = 0 # ZIP flag bits | |
315 self.volume = 0 # Volume number of file header | |
316 self.internal_attr = 0 # Internal attributes | |
317 self.external_attr = 0 # External file attributes | |
318 # Other attributes are set by class ZipFile: | |
319 # header_offset Byte offset to the file header | |
320 # CRC CRC-32 of the uncompressed file | |
321 # compress_size Size of the compressed file | |
322 # file_size Size of the uncompressed file | |
323 | |
324 def FileHeader(self): | |
325 """Return the per-file header as a string.""" | |
326 dt = self.date_time | |
327 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] | |
328 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) | |
329 if self.flag_bits & 0x08: | |
330 # Set these to zero because we write them after the file data | |
331 CRC = compress_size = file_size = 0 | |
332 else: | |
333 CRC = self.CRC | |
334 compress_size = self.compress_size | |
335 file_size = self.file_size | |
336 | |
337 extra = self.extra | |
338 | |
339 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: | |
340 # File is larger than what fits into a 4 byte integer, | |
341 # fall back to the ZIP64 extension | |
342 fmt = '<HHQQ' | |
343 extra = extra + struct.pack(fmt, | |
344 1, struct.calcsize(fmt)-4, file_size, compress_size) | |
345 file_size = 0xffffffff | |
346 compress_size = 0xffffffff | |
347 self.extract_version = max(45, self.extract_version) | |
348 self.create_version = max(45, self.extract_version) | |
349 | |
350 filename, flag_bits = self._encodeFilenameFlags() | |
351 header = struct.pack(structFileHeader, stringFileHeader, | |
352 self.extract_version, self.reserved, flag_bits, | |
353 self.compress_type, dostime, dosdate, CRC, | |
354 compress_size, file_size, | |
355 len(filename), len(extra)) | |
356 return header + filename + extra | |
357 | |
358 def _encodeFilenameFlags(self): | |
359 if isinstance(self.filename, unicode): | |
360 try: | |
361 return self.filename.encode('ascii'), self.flag_bits | |
362 except UnicodeEncodeError: | |
363 return self.filename.encode('utf-8'), self.flag_bits | 0x800 | |
364 else: | |
365 return self.filename, self.flag_bits | |
366 | |
367 def _decodeFilename(self): | |
368 if self.flag_bits & 0x800: | |
369 return self.filename.decode('utf-8') | |
370 else: | |
371 return self.filename | |
372 | |
373 def _decodeExtra(self): | |
374 # Try to decode the extra field. | |
375 extra = self.extra | |
376 unpack = struct.unpack | |
377 while extra: | |
378 tp, ln = unpack('<HH', extra[:4]) | |
379 if tp == 1: | |
380 if ln >= 24: | |
381 counts = unpack('<QQQ', extra[4:28]) | |
382 elif ln == 16: | |
383 counts = unpack('<QQ', extra[4:20]) | |
384 elif ln == 8: | |
385 counts = unpack('<Q', extra[4:12]) | |
386 elif ln == 0: | |
387 counts = () | |
388 else: | |
389 raise RuntimeError, "Corrupt extra field %s"%(ln,) | |
390 | |
391 idx = 0 | |
392 | |
393 # ZIP64 extension (large files and/or large archives) | |
394 if self.file_size in (0xffffffffffffffffL, 0xffffffffL): | |
395 self.file_size = counts[idx] | |
396 idx += 1 | |
397 | |
398 if self.compress_size == 0xFFFFFFFFL: | |
399 self.compress_size = counts[idx] | |
400 idx += 1 | |
401 | |
402 if self.header_offset == 0xffffffffL: | |
403 old = self.header_offset | |
404 self.header_offset = counts[idx] | |
405 idx+=1 | |
406 | |
407 extra = extra[ln+4:] | |
408 | |
409 | |
410 class _ZipDecrypter: | |
411 """Class to handle decryption of files stored within a ZIP archive. | |
412 | |
413 ZIP supports a password-based form of encryption. Even though known | |
414 plaintext attacks have been found against it, it is still useful | |
415 to be able to get data out of such a file. | |
416 | |
417 Usage: | |
418 zd = _ZipDecrypter(mypwd) | |
419 plain_char = zd(cypher_char) | |
420 plain_text = map(zd, cypher_text) | |
421 """ | |
422 | |
423 def _GenerateCRCTable(): | |
424 """Generate a CRC-32 table. | |
425 | |
426 ZIP encryption uses the CRC32 one-byte primitive for scrambling some | |
427 internal keys. We noticed that a direct implementation is faster than | |
428 relying on binascii.crc32(). | |
429 """ | |
430 poly = 0xedb88320 | |
431 table = [0] * 256 | |
432 for i in range(256): | |
433 crc = i | |
434 for j in range(8): | |
435 if crc & 1: | |
436 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly | |
437 else: | |
438 crc = ((crc >> 1) & 0x7FFFFFFF) | |
439 table[i] = crc | |
440 return table | |
441 crctable = _GenerateCRCTable() | |
442 | |
443 def _crc32(self, ch, crc): | |
444 """Compute the CRC32 primitive on one byte.""" | |
445 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff] | |
446 | |
447 def __init__(self, pwd): | |
448 self.key0 = 305419896 | |
449 self.key1 = 591751049 | |
450 self.key2 = 878082192 | |
451 for p in pwd: | |
452 self._UpdateKeys(p) | |
453 | |
454 def _UpdateKeys(self, c): | |
455 self.key0 = self._crc32(c, self.key0) | |
456 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295 | |
457 self.key1 = (self.key1 * 134775813 + 1) & 4294967295 | |
458 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2) | |
459 | |
460 def __call__(self, c): | |
461 """Decrypt a single character.""" | |
462 c = ord(c) | |
463 k = self.key2 | 2 | |
464 c = c ^ (((k * (k^1)) >> 8) & 255) | |
465 c = chr(c) | |
466 self._UpdateKeys(c) | |
467 return c | |
468 | |
469 class ZipExtFile(io.BufferedIOBase): | |
470 """File-like object for reading an archive member. | |
471 Is returned by ZipFile.open(). | |
472 """ | |
473 | |
474 # Max size supported by decompressor. | |
475 MAX_N = 1 << 31 - 1 | |
476 | |
477 # Read from compressed files in 4k blocks. | |
478 MIN_READ_SIZE = 4096 | |
479 | |
480 # Search for universal newlines or line chunks. | |
481 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)') | |
482 | |
483 def __init__(self, fileobj, mode, zipinfo, decrypter=None): | |
484 self._fileobj = fileobj | |
485 self._decrypter = decrypter | |
486 | |
487 self._compress_type = zipinfo.compress_type | |
488 self._compress_size = zipinfo.compress_size | |
489 self._compress_left = zipinfo.compress_size | |
490 | |
491 if self._compress_type == ZIP_DEFLATED: | |
492 self._decompressor = zlib.decompressobj(-15) | |
493 elif self._compress_type == ZIP_BZIP2: | |
494 self._decompressor = bz2.BZ2Decompressor() | |
495 self.MIN_READ_SIZE = 900000 | |
496 self._unconsumed = '' | |
497 | |
498 self._readbuffer = '' | |
499 self._offset = 0 | |
500 | |
501 self._universal = 'U' in mode | |
502 self.newlines = None | |
503 | |
504 # Adjust read size for encrypted files since the first 12 bytes | |
505 # are for the encryption/password information. | |
506 if self._decrypter is not None: | |
507 self._compress_left -= 12 | |
508 | |
509 self.mode = mode | |
510 self.name = zipinfo.filename | |
511 | |
29
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
512 if hasattr(zipinfo, 'CRC'): |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
513 self._expected_crc = zipinfo.CRC |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
514 self._running_crc = crc32(b'') & 0xffffffff |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
515 else: |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
516 self._expected_crc = None |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
517 |
21 | 518 def readline(self, limit=-1): |
519 """Read and return a line from the stream. | |
520 | |
521 If limit is specified, at most limit bytes will be read. | |
522 """ | |
523 | |
524 if not self._universal and limit < 0: | |
525 # Shortcut common case - newline found in buffer. | |
526 i = self._readbuffer.find('\n', self._offset) + 1 | |
527 if i > 0: | |
528 line = self._readbuffer[self._offset: i] | |
529 self._offset = i | |
530 return line | |
531 | |
532 if not self._universal: | |
533 return io.BufferedIOBase.readline(self, limit) | |
534 | |
535 line = '' | |
536 while limit < 0 or len(line) < limit: | |
537 readahead = self.peek(2) | |
538 if readahead == '': | |
539 return line | |
540 | |
541 # | |
542 # Search for universal newlines or line chunks. | |
543 # | |
544 # The pattern returns either a line chunk or a newline, but not | |
545 # both. Combined with peek(2), we are assured that the sequence | |
546 # '\r\n' is always retrieved completely and never split into | |
547 # separate newlines - '\r', '\n' due to coincidental readaheads. | |
548 # | |
549 match = self.PATTERN.search(readahead) | |
550 newline = match.group('newline') | |
551 if newline is not None: | |
552 if self.newlines is None: | |
553 self.newlines = [] | |
554 if newline not in self.newlines: | |
555 self.newlines.append(newline) | |
556 self._offset += len(newline) | |
557 return line + '\n' | |
558 | |
559 chunk = match.group('chunk') | |
560 if limit >= 0: | |
561 chunk = chunk[: limit - len(line)] | |
562 | |
563 self._offset += len(chunk) | |
564 line += chunk | |
565 | |
566 return line | |
567 | |
568 def peek(self, n=1): | |
569 """Returns buffered bytes without advancing the position.""" | |
570 if n > len(self._readbuffer) - self._offset: | |
571 chunk = self.read(n) | |
572 self._offset -= len(chunk) | |
573 | |
574 # Return up to 512 bytes to reduce allocation overhead for tight loops. | |
575 return self._readbuffer[self._offset: self._offset + 512] | |
576 | |
577 def readable(self): | |
578 return True | |
579 | |
580 def read(self, n=-1): | |
581 """Read and return up to n bytes. | |
582 If the argument is omitted, None, or negative, data is read and returned until EOF is reached.. | |
583 """ | |
584 buf = '' | |
29
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
585 if n is None: |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
586 n = -1 |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
587 while True: |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
588 if n < 0: |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
589 data = self.read1(n) |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
590 elif n > len(buf): |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
591 data = self.read1(n - len(buf)) |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
592 else: |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
593 return buf |
21 | 594 if len(data) == 0: |
595 return buf | |
596 buf += data | |
597 | |
29
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
598 def _update_crc(self, newdata, eof): |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
599 # Update the CRC using the given data. |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
600 if self._expected_crc is None: |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
601 # No need to compute the CRC if we don't have a reference value |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
602 return |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
603 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
604 # Check the CRC if we're at the end of the file |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
605 if eof and self._running_crc != self._expected_crc: |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
606 raise BadZipfile("Bad CRC-32 for file %r" % self.name) |
21 | 607 |
608 def read1(self, n): | |
609 """Read up to n bytes with at most one read() system call.""" | |
610 | |
611 # Simplify algorithm (branching) by transforming negative n to large n. | |
612 if n < 0 or n is None: | |
613 n = self.MAX_N | |
614 | |
615 # Bytes available in read buffer. | |
616 len_readbuffer = len(self._readbuffer) - self._offset | |
617 | |
618 # Read from file. | |
619 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed): | |
620 nbytes = n - len_readbuffer - len(self._unconsumed) | |
621 nbytes = max(nbytes, self.MIN_READ_SIZE) | |
622 nbytes = min(nbytes, self._compress_left) | |
623 | |
624 data = self._fileobj.read(nbytes) | |
625 self._compress_left -= len(data) | |
626 | |
627 if data and self._decrypter is not None: | |
628 data = ''.join(map(self._decrypter, data)) | |
629 | |
630 if self._compress_type == ZIP_STORED: | |
29
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
631 self._update_crc(data, eof=(self._compress_left==0)) |
21 | 632 self._readbuffer = self._readbuffer[self._offset:] + data |
633 self._offset = 0 | |
634 else: | |
635 # Prepare deflated bytes for decompression. | |
636 self._unconsumed += data | |
637 | |
638 # Handle unconsumed data. | |
639 if (len(self._unconsumed) > 0 and n > len_readbuffer and | |
640 self._compress_type == ZIP_DEFLATED): | |
641 data = self._decompressor.decompress( | |
642 self._unconsumed, | |
643 max(n - len_readbuffer, self.MIN_READ_SIZE) | |
644 ) | |
645 | |
646 self._unconsumed = self._decompressor.unconsumed_tail | |
29
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
647 eof = len(self._unconsumed) == 0 and self._compress_left == 0 |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
648 if eof: |
21 | 649 data += self._decompressor.flush() |
650 | |
29
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
651 self._update_crc(data, eof=eof) |
21 | 652 self._readbuffer = self._readbuffer[self._offset:] + data |
653 self._offset = 0 | |
654 elif (len(self._unconsumed) > 0 and n > len_readbuffer and | |
655 self._compress_type == ZIP_BZIP2): | |
656 data = self._decompressor.decompress(self._unconsumed) | |
657 | |
658 self._unconsumed = '' | |
659 self._readbuffer = self._readbuffer[self._offset:] + data | |
660 self._offset = 0 | |
661 | |
662 # Read from buffer. | |
663 data = self._readbuffer[self._offset: self._offset + n] | |
664 self._offset += len(data) | |
665 return data | |
666 | |
667 | |
668 | |
669 class ZipFile: | |
670 """ Class with methods to open, read, write, close, list zip files. | |
671 | |
672 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False) | |
673 | |
674 file: Either the path to the file, or a file-like object. | |
675 If it is a path, the file will be opened and closed by ZipFile. | |
676 mode: The mode can be either read "r", write "w" or append "a". | |
677 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib), | |
678 or ZIP_BZIP2 (requires bz2). | |
679 allowZip64: if True ZipFile will create files with ZIP64 extensions when | |
680 needed, otherwise it will raise an exception when this would | |
681 be necessary. | |
682 | |
683 """ | |
684 | |
685 fp = None # Set here since __del__ checks it | |
686 | |
687 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False): | |
688 """Open the ZIP file with mode read "r", write "w" or append "a".""" | |
689 if mode not in ("r", "w", "a"): | |
690 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"') | |
691 | |
692 if compression == ZIP_STORED: | |
693 pass | |
694 elif compression == ZIP_DEFLATED: | |
695 if not zlib: | |
696 raise RuntimeError,\ | |
697 "Compression requires the (missing) zlib module" | |
698 elif compression == ZIP_BZIP2: | |
699 if not bz2: | |
700 raise RuntimeError,\ | |
701 "Compression requires the (missing) bz2 module" | |
702 else: | |
703 raise RuntimeError, "That compression method is not supported" | |
704 | |
705 self._allowZip64 = allowZip64 | |
706 self._didModify = False | |
707 self.debug = 0 # Level of printing: 0 through 3 | |
708 self.NameToInfo = {} # Find file info given name | |
709 self.filelist = [] # List of ZipInfo instances for archive | |
710 self.compression = compression # Method of compression | |
711 self.mode = key = mode.replace('b', '')[0] | |
712 self.pwd = None | |
713 self.comment = '' | |
714 | |
715 # Check if we were passed a file-like object | |
716 if isinstance(file, basestring): | |
717 self._filePassed = 0 | |
718 self.filename = file | |
719 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'} | |
720 try: | |
721 self.fp = open(file, modeDict[mode]) | |
722 except IOError: | |
723 if mode == 'a': | |
724 mode = key = 'w' | |
725 self.fp = open(file, modeDict[mode]) | |
726 else: | |
727 raise | |
728 else: | |
729 self._filePassed = 1 | |
730 self.fp = file | |
731 self.filename = getattr(file, 'name', None) | |
732 | |
733 if key == 'r': | |
734 self._GetContents() | |
735 elif key == 'w': | |
32
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
736 # set the modified flag so central directory gets written |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
737 # even if no files are added to the archive |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
738 self._didModify = True |
21 | 739 elif key == 'a': |
32
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
740 try: |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
741 # See if file is a zip file |
21 | 742 self._RealGetContents() |
743 # seek to start of directory and overwrite | |
744 self.fp.seek(self.start_dir, 0) | |
32
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
745 except BadZipfile: |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
746 # file is not a zip file, just append |
21 | 747 self.fp.seek(0, 2) |
32
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
748 |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
749 # set the modified flag so central directory gets written |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
750 # even if no files are added to the archive |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
751 self._didModify = True |
21 | 752 else: |
753 if not self._filePassed: | |
754 self.fp.close() | |
755 self.fp = None | |
756 raise RuntimeError, 'Mode must be "r", "w" or "a"' | |
757 | |
758 def __enter__(self): | |
759 return self | |
760 | |
761 def __exit__(self, type, value, traceback): | |
762 self.close() | |
763 | |
764 def _GetContents(self): | |
765 """Read the directory, making sure we close the file if the format | |
766 is bad.""" | |
767 try: | |
768 self._RealGetContents() | |
769 except BadZipfile: | |
770 if not self._filePassed: | |
771 self.fp.close() | |
772 self.fp = None | |
773 raise | |
774 | |
775 def _RealGetContents(self): | |
776 """Read in the table of contents for the ZIP file.""" | |
777 fp = self.fp | |
32
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
778 try: |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
779 endrec = _EndRecData(fp) |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
780 except IOError: |
3000bb94addb
Updated zipfile to 2.7.1 and 3.1.3 final releases.
Oleg Oshmyan <chortos@inbox.lv>
parents:
29
diff
changeset
|
781 raise BadZipfile("File is not a zip file") |
21 | 782 if not endrec: |
783 raise BadZipfile, "File is not a zip file" | |
784 if self.debug > 1: | |
785 print endrec | |
786 size_cd = endrec[_ECD_SIZE] # bytes in central directory | |
787 offset_cd = endrec[_ECD_OFFSET] # offset of central directory | |
788 self.comment = endrec[_ECD_COMMENT] # archive comment | |
789 | |
790 # "concat" is zero, unless zip was concatenated to another file | |
791 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd | |
792 if endrec[_ECD_SIGNATURE] == stringEndArchive64: | |
793 # If Zip64 extension structures are present, account for them | |
794 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) | |
795 | |
796 if self.debug > 2: | |
797 inferred = concat + offset_cd | |
798 print "given, inferred, offset", offset_cd, inferred, concat | |
799 # self.start_dir: Position of start of central directory | |
800 self.start_dir = offset_cd + concat | |
801 fp.seek(self.start_dir, 0) | |
802 data = fp.read(size_cd) | |
803 fp = cStringIO.StringIO(data) | |
804 total = 0 | |
805 while total < size_cd: | |
806 centdir = fp.read(sizeCentralDir) | |
807 if centdir[0:4] != stringCentralDir: | |
808 raise BadZipfile, "Bad magic number for central directory" | |
809 centdir = struct.unpack(structCentralDir, centdir) | |
810 if self.debug > 2: | |
811 print centdir | |
812 filename = fp.read(centdir[_CD_FILENAME_LENGTH]) | |
813 # Create ZipInfo instance to store file information | |
814 x = ZipInfo(filename) | |
815 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) | |
816 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) | |
817 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] | |
818 (x.create_version, x.create_system, x.extract_version, x.reserved, | |
819 x.flag_bits, x.compress_type, t, d, | |
820 x.CRC, x.compress_size, x.file_size) = centdir[1:12] | |
821 x.volume, x.internal_attr, x.external_attr = centdir[15:18] | |
822 # Convert date/time code to (year, month, day, hour, min, sec) | |
823 x._raw_time = t | |
824 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F, | |
825 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 ) | |
826 | |
827 x._decodeExtra() | |
828 x.header_offset = x.header_offset + concat | |
829 x.filename = x._decodeFilename() | |
830 self.filelist.append(x) | |
831 self.NameToInfo[x.filename] = x | |
832 | |
833 # update total bytes read from central directory | |
834 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] | |
835 + centdir[_CD_EXTRA_FIELD_LENGTH] | |
836 + centdir[_CD_COMMENT_LENGTH]) | |
837 | |
838 if self.debug > 2: | |
839 print "total", total | |
840 | |
841 | |
842 def namelist(self): | |
843 """Return a list of file names in the archive.""" | |
844 l = [] | |
845 for data in self.filelist: | |
846 l.append(data.filename) | |
847 return l | |
848 | |
849 def infolist(self): | |
850 """Return a list of class ZipInfo instances for files in the | |
851 archive.""" | |
852 return self.filelist | |
853 | |
854 def printdir(self): | |
855 """Print a table of contents for the zip file.""" | |
856 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size") | |
857 for zinfo in self.filelist: | |
858 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6] | |
859 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size) | |
860 | |
861 def testzip(self): | |
862 """Read all the files and check the CRC.""" | |
863 chunk_size = 2 ** 20 | |
864 for zinfo in self.filelist: | |
865 try: | |
866 # Read by chunks, to avoid an OverflowError or a | |
867 # MemoryError with very large embedded files. | |
868 f = self.open(zinfo.filename, "r") | |
869 while f.read(chunk_size): # Check CRC-32 | |
870 pass | |
871 except BadZipfile: | |
872 return zinfo.filename | |
873 | |
874 def getinfo(self, name): | |
875 """Return the instance of ZipInfo given 'name'.""" | |
876 info = self.NameToInfo.get(name) | |
877 if info is None: | |
878 raise KeyError( | |
879 'There is no item named %r in the archive' % name) | |
880 | |
881 return info | |
882 | |
883 def setpassword(self, pwd): | |
884 """Set default password for encrypted files.""" | |
885 self.pwd = pwd | |
886 | |
887 def read(self, name, pwd=None): | |
888 """Return file bytes (as a string) for name.""" | |
889 return self.open(name, "r", pwd).read() | |
890 | |
891 def open(self, name, mode="r", pwd=None): | |
892 """Return file-like object for 'name'.""" | |
893 if mode not in ("r", "U", "rU"): | |
894 raise RuntimeError, 'open() requires mode "r", "U", or "rU"' | |
895 if not self.fp: | |
896 raise RuntimeError, \ | |
897 "Attempt to read ZIP archive that was already closed" | |
898 | |
899 # Only open a new file for instances where we were not | |
900 # given a file object in the constructor | |
901 if self._filePassed: | |
902 zef_file = self.fp | |
903 else: | |
904 zef_file = open(self.filename, 'rb') | |
905 | |
906 # Make sure we have an info object | |
907 if isinstance(name, ZipInfo): | |
908 # 'name' is already an info object | |
909 zinfo = name | |
910 else: | |
911 # Get info object for name | |
912 zinfo = self.getinfo(name) | |
913 | |
914 zef_file.seek(zinfo.header_offset, 0) | |
915 | |
916 # Skip the file header: | |
917 fheader = zef_file.read(sizeFileHeader) | |
918 if fheader[0:4] != stringFileHeader: | |
919 raise BadZipfile, "Bad magic number for file header" | |
920 | |
921 fheader = struct.unpack(structFileHeader, fheader) | |
922 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) | |
923 if fheader[_FH_EXTRA_FIELD_LENGTH]: | |
924 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) | |
925 | |
926 if fname != zinfo.orig_filename: | |
927 raise BadZipfile, \ | |
928 'File name in directory "%s" and header "%s" differ.' % ( | |
929 zinfo.orig_filename, fname) | |
930 | |
931 # check for encrypted flag & handle password | |
932 is_encrypted = zinfo.flag_bits & 0x1 | |
933 zd = None | |
934 if is_encrypted: | |
935 if not pwd: | |
936 pwd = self.pwd | |
937 if not pwd: | |
938 raise RuntimeError, "File %s is encrypted, " \ | |
939 "password required for extraction" % name | |
940 | |
941 zd = _ZipDecrypter(pwd) | |
942 # The first 12 bytes in the cypher stream is an encryption header | |
943 # used to strengthen the algorithm. The first 11 bytes are | |
944 # completely random, while the 12th contains the MSB of the CRC, | |
945 # or the MSB of the file time depending on the header type | |
946 # and is used to check the correctness of the password. | |
947 bytes = zef_file.read(12) | |
948 h = map(zd, bytes[0:12]) | |
949 if zinfo.flag_bits & 0x8: | |
950 # compare against the file type from extended local headers | |
951 check_byte = (zinfo._raw_time >> 8) & 0xff | |
952 else: | |
953 # compare against the CRC otherwise | |
954 check_byte = (zinfo.CRC >> 24) & 0xff | |
955 if ord(h[11]) != check_byte: | |
956 raise RuntimeError("Bad password for file", name) | |
957 | |
958 return ZipExtFile(zef_file, mode, zinfo, zd) | |
959 | |
960 def extract(self, member, path=None, pwd=None): | |
961 """Extract a member from the archive to the current working directory, | |
962 using its full name. Its file information is extracted as accurately | |
963 as possible. `member' may be a filename or a ZipInfo object. You can | |
964 specify a different directory using `path'. | |
965 """ | |
966 if not isinstance(member, ZipInfo): | |
967 member = self.getinfo(member) | |
968 | |
969 if path is None: | |
970 path = os.getcwd() | |
971 | |
972 return self._extract_member(member, path, pwd) | |
973 | |
974 def extractall(self, path=None, members=None, pwd=None): | |
975 """Extract all members from the archive to the current working | |
976 directory. `path' specifies a different directory to extract to. | |
977 `members' is optional and must be a subset of the list returned | |
978 by namelist(). | |
979 """ | |
980 if members is None: | |
981 members = self.namelist() | |
982 | |
983 for zipinfo in members: | |
984 self.extract(zipinfo, path, pwd) | |
985 | |
986 def _extract_member(self, member, targetpath, pwd): | |
987 """Extract the ZipInfo object 'member' to a physical | |
988 file on the path targetpath. | |
989 """ | |
990 # build the destination pathname, replacing | |
991 # forward slashes to platform specific separators. | |
992 # Strip trailing path separator, unless it represents the root. | |
993 if (targetpath[-1:] in (os.path.sep, os.path.altsep) | |
994 and len(os.path.splitdrive(targetpath)[1]) > 1): | |
995 targetpath = targetpath[:-1] | |
996 | |
997 # don't include leading "/" from file name if present | |
998 if member.filename[0] == '/': | |
999 targetpath = os.path.join(targetpath, member.filename[1:]) | |
1000 else: | |
1001 targetpath = os.path.join(targetpath, member.filename) | |
1002 | |
1003 targetpath = os.path.normpath(targetpath) | |
1004 | |
1005 # Create all upper directories if necessary. | |
1006 upperdirs = os.path.dirname(targetpath) | |
1007 if upperdirs and not os.path.exists(upperdirs): | |
1008 os.makedirs(upperdirs) | |
1009 | |
1010 if member.filename[-1] == '/': | |
1011 if not os.path.isdir(targetpath): | |
1012 os.mkdir(targetpath) | |
1013 return targetpath | |
1014 | |
1015 source = self.open(member, pwd=pwd) | |
1016 target = file(targetpath, "wb") | |
1017 shutil.copyfileobj(source, target) | |
1018 source.close() | |
1019 target.close() | |
1020 | |
1021 return targetpath | |
1022 | |
1023 def _writecheck(self, zinfo): | |
1024 """Check for errors before writing a file to the archive.""" | |
1025 if zinfo.filename in self.NameToInfo: | |
1026 if self.debug: # Warning for duplicate names | |
1027 print "Duplicate name:", zinfo.filename | |
1028 if self.mode not in ("w", "a"): | |
1029 raise RuntimeError, 'write() requires mode "w" or "a"' | |
1030 if not self.fp: | |
1031 raise RuntimeError, \ | |
1032 "Attempt to write ZIP archive that was already closed" | |
1033 if zinfo.compress_type == ZIP_DEFLATED and not zlib: | |
1034 raise RuntimeError, \ | |
1035 "Compression requires the (missing) zlib module" | |
1036 if zinfo.compress_type == ZIP_BZIP2 and not bz2: | |
1037 raise RuntimeError, \ | |
1038 "Compression requires the (missing) bz2 module" | |
1039 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED, ZIP_BZIP2): | |
1040 raise RuntimeError, \ | |
1041 "That compression method is not supported" | |
1042 if zinfo.file_size > ZIP64_LIMIT: | |
1043 if not self._allowZip64: | |
1044 raise LargeZipFile("Filesize would require ZIP64 extensions") | |
1045 if zinfo.header_offset > ZIP64_LIMIT: | |
1046 if not self._allowZip64: | |
1047 raise LargeZipFile("Zipfile size would require ZIP64 extensions") | |
1048 | |
1049 def write(self, filename, arcname=None, compress_type=None): | |
1050 """Put the bytes from filename into the archive under the name | |
1051 arcname.""" | |
1052 if not self.fp: | |
1053 raise RuntimeError( | |
1054 "Attempt to write to ZIP archive that was already closed") | |
1055 | |
1056 st = os.stat(filename) | |
1057 isdir = stat.S_ISDIR(st.st_mode) | |
1058 mtime = time.localtime(st.st_mtime) | |
1059 date_time = mtime[0:6] | |
1060 # Create ZipInfo instance to store file information | |
1061 if arcname is None: | |
1062 arcname = filename | |
1063 arcname = os.path.normpath(os.path.splitdrive(arcname)[1]) | |
1064 while arcname[0] in (os.sep, os.altsep): | |
1065 arcname = arcname[1:] | |
1066 if isdir: | |
1067 arcname += '/' | |
1068 zinfo = ZipInfo(arcname, date_time) | |
1069 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes | |
1070 if compress_type is None: | |
1071 zinfo.compress_type = self.compression | |
1072 else: | |
1073 zinfo.compress_type = compress_type | |
1074 | |
1075 zinfo.file_size = st.st_size | |
1076 zinfo.flag_bits = 0x00 | |
1077 zinfo.header_offset = self.fp.tell() # Start of header bytes | |
1078 | |
1079 self._writecheck(zinfo) | |
1080 self._didModify = True | |
1081 | |
1082 if isdir: | |
1083 zinfo.file_size = 0 | |
1084 zinfo.compress_size = 0 | |
1085 zinfo.CRC = 0 | |
1086 self.filelist.append(zinfo) | |
1087 self.NameToInfo[zinfo.filename] = zinfo | |
1088 self.fp.write(zinfo.FileHeader()) | |
1089 return | |
1090 | |
1091 with open(filename, "rb") as fp: | |
1092 # Must overwrite CRC and sizes with correct data later | |
1093 zinfo.CRC = CRC = 0 | |
1094 zinfo.compress_size = compress_size = 0 | |
1095 zinfo.file_size = file_size = 0 | |
1096 self.fp.write(zinfo.FileHeader()) | |
1097 if zinfo.compress_type == ZIP_DEFLATED: | |
1098 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, | |
1099 zlib.DEFLATED, -15) | |
1100 elif zinfo.compress_type == ZIP_BZIP2: | |
1101 cmpr = bz2.BZ2Compressor() | |
1102 else: | |
1103 cmpr = None | |
1104 while 1: | |
1105 buf = fp.read(1024 * 8) | |
1106 if not buf: | |
1107 break | |
1108 file_size = file_size + len(buf) | |
1109 CRC = crc32(buf, CRC) & 0xffffffff | |
1110 if cmpr: | |
1111 buf = cmpr.compress(buf) | |
1112 compress_size = compress_size + len(buf) | |
1113 self.fp.write(buf) | |
1114 if cmpr: | |
1115 buf = cmpr.flush() | |
1116 compress_size = compress_size + len(buf) | |
1117 self.fp.write(buf) | |
1118 zinfo.compress_size = compress_size | |
1119 else: | |
1120 zinfo.compress_size = file_size | |
1121 zinfo.CRC = CRC | |
1122 zinfo.file_size = file_size | |
1123 # Seek backwards and write CRC and file sizes | |
1124 position = self.fp.tell() # Preserve current position in file | |
1125 self.fp.seek(zinfo.header_offset + 14, 0) | |
1126 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size, | |
1127 zinfo.file_size)) | |
1128 self.fp.seek(position, 0) | |
1129 self.filelist.append(zinfo) | |
1130 self.NameToInfo[zinfo.filename] = zinfo | |
1131 | |
1132 def writestr(self, zinfo_or_arcname, bytes, compress_type=None): | |
1133 """Write a file into the archive. The contents is the string | |
1134 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or | |
1135 the name of the file in the archive.""" | |
1136 if not isinstance(zinfo_or_arcname, ZipInfo): | |
1137 zinfo = ZipInfo(filename=zinfo_or_arcname, | |
1138 date_time=time.localtime(time.time())[:6]) | |
1139 | |
1140 zinfo.compress_type = self.compression | |
1141 zinfo.external_attr = 0600 << 16 | |
1142 else: | |
1143 zinfo = zinfo_or_arcname | |
1144 | |
1145 if not self.fp: | |
1146 raise RuntimeError( | |
1147 "Attempt to write to ZIP archive that was already closed") | |
1148 | |
1149 if compress_type is not None: | |
1150 zinfo.compress_type = compress_type | |
1151 | |
1152 zinfo.file_size = len(bytes) # Uncompressed size | |
1153 zinfo.header_offset = self.fp.tell() # Start of header bytes | |
1154 self._writecheck(zinfo) | |
1155 self._didModify = True | |
1156 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum | |
1157 if zinfo.compress_type == ZIP_DEFLATED: | |
1158 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, | |
1159 zlib.DEFLATED, -15) | |
1160 bytes = co.compress(bytes) + co.flush() | |
1161 zinfo.compress_size = len(bytes) # Compressed size | |
1162 elif zinfo.compress_type == ZIP_BZIP2: | |
1163 co = bz2.BZ2Compressor() | |
1164 bytes = co.compress(bytes) + co.flush() | |
1165 zinfo.compress_size = len(bytes) # Compressed size | |
1166 else: | |
1167 zinfo.compress_size = zinfo.file_size | |
1168 zinfo.header_offset = self.fp.tell() # Start of header bytes | |
1169 self.fp.write(zinfo.FileHeader()) | |
1170 self.fp.write(bytes) | |
1171 self.fp.flush() | |
1172 if zinfo.flag_bits & 0x08: | |
1173 # Write CRC and file sizes after the file data | |
1174 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size, | |
1175 zinfo.file_size)) | |
1176 self.filelist.append(zinfo) | |
1177 self.NameToInfo[zinfo.filename] = zinfo | |
1178 | |
1179 def __del__(self): | |
1180 """Call the "close()" method in case the user forgot.""" | |
1181 self.close() | |
1182 | |
1183 def close(self): | |
1184 """Close the file, and for mode "w" and "a" write the ending | |
1185 records.""" | |
1186 if self.fp is None: | |
1187 return | |
1188 | |
1189 if self.mode in ("w", "a") and self._didModify: # write ending records | |
1190 count = 0 | |
1191 pos1 = self.fp.tell() | |
1192 for zinfo in self.filelist: # write central directory | |
1193 count = count + 1 | |
1194 dt = zinfo.date_time | |
1195 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] | |
1196 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) | |
1197 extra = [] | |
1198 if zinfo.file_size > ZIP64_LIMIT \ | |
1199 or zinfo.compress_size > ZIP64_LIMIT: | |
1200 extra.append(zinfo.file_size) | |
1201 extra.append(zinfo.compress_size) | |
1202 file_size = 0xffffffff | |
1203 compress_size = 0xffffffff | |
1204 else: | |
1205 file_size = zinfo.file_size | |
1206 compress_size = zinfo.compress_size | |
1207 | |
1208 if zinfo.header_offset > ZIP64_LIMIT: | |
1209 extra.append(zinfo.header_offset) | |
1210 header_offset = 0xffffffffL | |
1211 else: | |
1212 header_offset = zinfo.header_offset | |
1213 | |
1214 extra_data = zinfo.extra | |
1215 if extra: | |
1216 # Append a ZIP64 field to the extra's | |
1217 extra_data = struct.pack( | |
1218 '<HH' + 'Q'*len(extra), | |
1219 1, 8*len(extra), *extra) + extra_data | |
1220 | |
1221 extract_version = max(45, zinfo.extract_version) | |
1222 create_version = max(45, zinfo.create_version) | |
1223 else: | |
1224 extract_version = zinfo.extract_version | |
1225 create_version = zinfo.create_version | |
1226 | |
1227 try: | |
1228 filename, flag_bits = zinfo._encodeFilenameFlags() | |
1229 centdir = struct.pack(structCentralDir, | |
1230 stringCentralDir, create_version, | |
1231 zinfo.create_system, extract_version, zinfo.reserved, | |
1232 flag_bits, zinfo.compress_type, dostime, dosdate, | |
1233 zinfo.CRC, compress_size, file_size, | |
1234 len(filename), len(extra_data), len(zinfo.comment), | |
1235 0, zinfo.internal_attr, zinfo.external_attr, | |
1236 header_offset) | |
1237 except DeprecationWarning: | |
1238 print >>sys.stderr, (structCentralDir, | |
1239 stringCentralDir, create_version, | |
1240 zinfo.create_system, extract_version, zinfo.reserved, | |
1241 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate, | |
1242 zinfo.CRC, compress_size, file_size, | |
1243 len(zinfo.filename), len(extra_data), len(zinfo.comment), | |
1244 0, zinfo.internal_attr, zinfo.external_attr, | |
1245 header_offset) | |
1246 raise | |
1247 self.fp.write(centdir) | |
1248 self.fp.write(filename) | |
1249 self.fp.write(extra_data) | |
1250 self.fp.write(zinfo.comment) | |
1251 | |
1252 pos2 = self.fp.tell() | |
1253 # Write end-of-zip-archive record | |
1254 centDirCount = count | |
1255 centDirSize = pos2 - pos1 | |
1256 centDirOffset = pos1 | |
1257 if (centDirCount >= ZIP_FILECOUNT_LIMIT or | |
1258 centDirOffset > ZIP64_LIMIT or | |
1259 centDirSize > ZIP64_LIMIT): | |
1260 # Need to write the ZIP64 end-of-archive records | |
1261 zip64endrec = struct.pack( | |
1262 structEndArchive64, stringEndArchive64, | |
1263 44, 45, 45, 0, 0, centDirCount, centDirCount, | |
1264 centDirSize, centDirOffset) | |
1265 self.fp.write(zip64endrec) | |
1266 | |
1267 zip64locrec = struct.pack( | |
1268 structEndArchive64Locator, | |
1269 stringEndArchive64Locator, 0, pos2, 1) | |
1270 self.fp.write(zip64locrec) | |
1271 centDirCount = min(centDirCount, 0xFFFF) | |
1272 centDirSize = min(centDirSize, 0xFFFFFFFF) | |
1273 centDirOffset = min(centDirOffset, 0xFFFFFFFF) | |
1274 | |
1275 # check for valid comment length | |
1276 if len(self.comment) >= ZIP_MAX_COMMENT: | |
1277 if self.debug > 0: | |
1278 msg = 'Archive comment is too long; truncating to %d bytes' \ | |
1279 % ZIP_MAX_COMMENT | |
1280 self.comment = self.comment[:ZIP_MAX_COMMENT] | |
1281 | |
1282 endrec = struct.pack(structEndArchive, stringEndArchive, | |
1283 0, 0, centDirCount, centDirCount, | |
1284 centDirSize, centDirOffset, len(self.comment)) | |
1285 self.fp.write(endrec) | |
1286 self.fp.write(self.comment) | |
1287 self.fp.flush() | |
1288 | |
1289 if not self._filePassed: | |
1290 self.fp.close() | |
1291 self.fp = None | |
1292 | |
1293 | |
1294 class PyZipFile(ZipFile): | |
1295 """Class to create ZIP archives with Python library files and packages.""" | |
1296 | |
1297 def writepy(self, pathname, basename = ""): | |
1298 """Add all files from "pathname" to the ZIP archive. | |
1299 | |
1300 If pathname is a package directory, search the directory and | |
1301 all package subdirectories recursively for all *.py and enter | |
1302 the modules into the archive. If pathname is a plain | |
1303 directory, listdir *.py and enter all modules. Else, pathname | |
1304 must be a Python *.py file and the module will be put into the | |
1305 archive. Added modules are always module.pyo or module.pyc. | |
1306 This method will compile the module.py into module.pyc if | |
1307 necessary. | |
1308 """ | |
1309 dir, name = os.path.split(pathname) | |
1310 if os.path.isdir(pathname): | |
1311 initname = os.path.join(pathname, "__init__.py") | |
1312 if os.path.isfile(initname): | |
1313 # This is a package directory, add it | |
1314 if basename: | |
1315 basename = "%s/%s" % (basename, name) | |
1316 else: | |
1317 basename = name | |
1318 if self.debug: | |
1319 print "Adding package in", pathname, "as", basename | |
1320 fname, arcname = self._get_codename(initname[0:-3], basename) | |
1321 if self.debug: | |
1322 print "Adding", arcname | |
1323 self.write(fname, arcname) | |
1324 dirlist = os.listdir(pathname) | |
1325 dirlist.remove("__init__.py") | |
1326 # Add all *.py files and package subdirectories | |
1327 for filename in dirlist: | |
1328 path = os.path.join(pathname, filename) | |
1329 root, ext = os.path.splitext(filename) | |
1330 if os.path.isdir(path): | |
1331 if os.path.isfile(os.path.join(path, "__init__.py")): | |
1332 # This is a package directory, add it | |
1333 self.writepy(path, basename) # Recursive call | |
1334 elif ext == ".py": | |
1335 fname, arcname = self._get_codename(path[0:-3], | |
1336 basename) | |
1337 if self.debug: | |
1338 print "Adding", arcname | |
1339 self.write(fname, arcname) | |
1340 else: | |
1341 # This is NOT a package directory, add its files at top level | |
1342 if self.debug: | |
1343 print "Adding files from directory", pathname | |
1344 for filename in os.listdir(pathname): | |
1345 path = os.path.join(pathname, filename) | |
1346 root, ext = os.path.splitext(filename) | |
1347 if ext == ".py": | |
1348 fname, arcname = self._get_codename(path[0:-3], | |
1349 basename) | |
1350 if self.debug: | |
1351 print "Adding", arcname | |
1352 self.write(fname, arcname) | |
1353 else: | |
1354 if pathname[-3:] != ".py": | |
1355 raise RuntimeError, \ | |
1356 'Files added with writepy() must end with ".py"' | |
1357 fname, arcname = self._get_codename(pathname[0:-3], basename) | |
1358 if self.debug: | |
1359 print "Adding file", arcname | |
1360 self.write(fname, arcname) | |
1361 | |
1362 def _get_codename(self, pathname, basename): | |
1363 """Return (filename, archivename) for the path. | |
1364 | |
1365 Given a module name path, return the correct file path and | |
1366 archive name, compiling if necessary. For example, given | |
1367 /python/lib/string, return (/python/lib/string.pyc, string). | |
1368 """ | |
1369 file_py = pathname + ".py" | |
1370 file_pyc = pathname + ".pyc" | |
1371 file_pyo = pathname + ".pyo" | |
1372 if os.path.isfile(file_pyo) and \ | |
1373 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime: | |
1374 fname = file_pyo # Use .pyo file | |
1375 elif not os.path.isfile(file_pyc) or \ | |
1376 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime: | |
1377 import py_compile | |
1378 if self.debug: | |
1379 print "Compiling", file_py | |
1380 try: | |
1381 py_compile.compile(file_py, file_pyc, None, True) | |
1382 except py_compile.PyCompileError,err: | |
1383 print err.msg | |
1384 fname = file_pyc | |
1385 else: | |
1386 fname = file_pyc | |
1387 archivename = os.path.split(fname)[1] | |
1388 if basename: | |
1389 archivename = "%s/%s" % (basename, archivename) | |
1390 return (fname, archivename) | |
1391 | |
1392 | |
1393 def main(args = None): | |
1394 import textwrap | |
1395 USAGE=textwrap.dedent("""\ | |
1396 Usage: | |
1397 zipfile.py -l zipfile.zip # Show listing of a zipfile | |
1398 zipfile.py -t zipfile.zip # Test if a zipfile is valid | |
1399 zipfile.py -e zipfile.zip target # Extract zipfile into target dir | |
1400 zipfile.py -c zipfile.zip src ... # Create zipfile from sources | |
1401 """) | |
1402 if args is None: | |
1403 args = sys.argv[1:] | |
1404 | |
1405 if not args or args[0] not in ('-l', '-c', '-e', '-t'): | |
1406 print USAGE | |
1407 sys.exit(1) | |
1408 | |
1409 if args[0] == '-l': | |
1410 if len(args) != 2: | |
1411 print USAGE | |
1412 sys.exit(1) | |
1413 zf = ZipFile(args[1], 'r') | |
1414 zf.printdir() | |
1415 zf.close() | |
1416 | |
1417 elif args[0] == '-t': | |
1418 if len(args) != 2: | |
1419 print USAGE | |
1420 sys.exit(1) | |
1421 zf = ZipFile(args[1], 'r') | |
29
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
1422 badfile = zf.testzip() |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
1423 if badfile: |
a8cc383b787c
Clean up zipfiles and diff them to stock ones
Oleg Oshmyan <chortos@inbox.lv>
parents:
21
diff
changeset
|
1424 print("The following enclosed file is corrupted: {!r}".format(badfile)) |
21 | 1425 print "Done testing" |
1426 | |
1427 elif args[0] == '-e': | |
1428 if len(args) != 3: | |
1429 print USAGE | |
1430 sys.exit(1) | |
1431 | |
1432 zf = ZipFile(args[1], 'r') | |
1433 out = args[2] | |
1434 for path in zf.namelist(): | |
1435 if path.startswith('./'): | |
1436 tgt = os.path.join(out, path[2:]) | |
1437 else: | |
1438 tgt = os.path.join(out, path) | |
1439 | |
1440 tgtdir = os.path.dirname(tgt) | |
1441 if not os.path.exists(tgtdir): | |
1442 os.makedirs(tgtdir) | |
1443 with open(tgt, 'wb') as fp: | |
1444 fp.write(zf.read(path)) | |
1445 zf.close() | |
1446 | |
1447 elif args[0] == '-c': | |
1448 if len(args) < 3: | |
1449 print USAGE | |
1450 sys.exit(1) | |
1451 | |
1452 def addToZip(zf, path, zippath): | |
1453 if os.path.isfile(path): | |
1454 zf.write(path, zippath, ZIP_DEFLATED) | |
1455 elif os.path.isdir(path): | |
1456 for nm in os.listdir(path): | |
1457 addToZip(zf, | |
1458 os.path.join(path, nm), os.path.join(zippath, nm)) | |
1459 # else: ignore | |
1460 | |
1461 zf = ZipFile(args[1], 'w', allowZip64=True) | |
1462 for src in args[2:]: | |
1463 addToZip(zf, src, os.path.basename(src)) | |
1464 | |
1465 zf.close() | |
1466 | |
1467 if __name__ == "__main__": | |
1468 main() |