comparison zipfile32.py @ 97:bbf9c434fa57

Added zipfile-with-bzip2 implementation for Python 3.2
author Oleg Oshmyan <chortos@inbox.lv>
date Wed, 02 Mar 2011 19:00:57 +0000
parents
children
comparison
equal deleted inserted replaced
96:c3afa2b0c14c 97:bbf9c434fa57
1 """
2 Read and write ZIP files.
3
4 XXX references to utf-8 need further investigation.
5 """
6 import io
7 import os
8 import re
9 import imp
10 import sys
11 import time
12 import stat
13 import shutil
14 import struct
15 import binascii
16
17
18 try:
19 import zlib # We may need its compression method
20 crc32 = zlib.crc32
21 except ImportError:
22 zlib = None
23 crc32 = binascii.crc32
24
25 try:
26 import bz2 # We may need its compression method
27 except ImportError:
28 bz2 = None
29
30 __all__ = ["BadZipFile", "BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED",
31 "is_zipfile", "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile",
32 "ZIP_BZIP2"]
33
34 class BadZipFile(Exception):
35 pass
36
37
38 class LargeZipFile(Exception):
39 """
40 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
41 and those extensions are disabled.
42 """
43
44 error = BadZipfile = BadZipFile # Pre-3.2 compatibility names
45
46
47 ZIP64_LIMIT = (1 << 31) - 1
48 ZIP_FILECOUNT_LIMIT = 1 << 16
49 ZIP_MAX_COMMENT = (1 << 16) - 1
50
51 # constants for Zip file compression methods
52 ZIP_STORED = 0
53 ZIP_DEFLATED = 8
54 ZIP_BZIP2 = 12
55 # Other ZIP compression methods not supported
56
57 # Below are some formats and associated data for reading/writing headers using
58 # the struct module. The names and structures of headers/records are those used
59 # in the PKWARE description of the ZIP file format:
60 # http://www.pkware.com/documents/casestudies/APPNOTE.TXT
61 # (URL valid as of January 2008)
62
63 # The "end of central directory" structure, magic number, size, and indices
64 # (section V.I in the format document)
65 structEndArchive = b"<4s4H2LH"
66 stringEndArchive = b"PK\005\006"
67 sizeEndCentDir = struct.calcsize(structEndArchive)
68
69 _ECD_SIGNATURE = 0
70 _ECD_DISK_NUMBER = 1
71 _ECD_DISK_START = 2
72 _ECD_ENTRIES_THIS_DISK = 3
73 _ECD_ENTRIES_TOTAL = 4
74 _ECD_SIZE = 5
75 _ECD_OFFSET = 6
76 _ECD_COMMENT_SIZE = 7
77 # These last two indices are not part of the structure as defined in the
78 # spec, but they are used internally by this module as a convenience
79 _ECD_COMMENT = 8
80 _ECD_LOCATION = 9
81
82 # The "central directory" structure, magic number, size, and indices
83 # of entries in the structure (section V.F in the format document)
84 structCentralDir = "<4s4B4HL2L5H2L"
85 stringCentralDir = b"PK\001\002"
86 sizeCentralDir = struct.calcsize(structCentralDir)
87
88 # indexes of entries in the central directory structure
89 _CD_SIGNATURE = 0
90 _CD_CREATE_VERSION = 1
91 _CD_CREATE_SYSTEM = 2
92 _CD_EXTRACT_VERSION = 3
93 _CD_EXTRACT_SYSTEM = 4
94 _CD_FLAG_BITS = 5
95 _CD_COMPRESS_TYPE = 6
96 _CD_TIME = 7
97 _CD_DATE = 8
98 _CD_CRC = 9
99 _CD_COMPRESSED_SIZE = 10
100 _CD_UNCOMPRESSED_SIZE = 11
101 _CD_FILENAME_LENGTH = 12
102 _CD_EXTRA_FIELD_LENGTH = 13
103 _CD_COMMENT_LENGTH = 14
104 _CD_DISK_NUMBER_START = 15
105 _CD_INTERNAL_FILE_ATTRIBUTES = 16
106 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
107 _CD_LOCAL_HEADER_OFFSET = 18
108
109 # The "local file header" structure, magic number, size, and indices
110 # (section V.A in the format document)
111 structFileHeader = "<4s2B4HL2L2H"
112 stringFileHeader = b"PK\003\004"
113 sizeFileHeader = struct.calcsize(structFileHeader)
114
115 _FH_SIGNATURE = 0
116 _FH_EXTRACT_VERSION = 1
117 _FH_EXTRACT_SYSTEM = 2
118 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
119 _FH_COMPRESSION_METHOD = 4
120 _FH_LAST_MOD_TIME = 5
121 _FH_LAST_MOD_DATE = 6
122 _FH_CRC = 7
123 _FH_COMPRESSED_SIZE = 8
124 _FH_UNCOMPRESSED_SIZE = 9
125 _FH_FILENAME_LENGTH = 10
126 _FH_EXTRA_FIELD_LENGTH = 11
127
128 # The "Zip64 end of central directory locator" structure, magic number, and size
129 structEndArchive64Locator = "<4sLQL"
130 stringEndArchive64Locator = b"PK\x06\x07"
131 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
132
133 # The "Zip64 end of central directory" record, magic number, size, and indices
134 # (section V.G in the format document)
135 structEndArchive64 = "<4sQ2H2L4Q"
136 stringEndArchive64 = b"PK\x06\x06"
137 sizeEndCentDir64 = struct.calcsize(structEndArchive64)
138
139 _CD64_SIGNATURE = 0
140 _CD64_DIRECTORY_RECSIZE = 1
141 _CD64_CREATE_VERSION = 2
142 _CD64_EXTRACT_VERSION = 3
143 _CD64_DISK_NUMBER = 4
144 _CD64_DISK_NUMBER_START = 5
145 _CD64_NUMBER_ENTRIES_THIS_DISK = 6
146 _CD64_NUMBER_ENTRIES_TOTAL = 7
147 _CD64_DIRECTORY_SIZE = 8
148 _CD64_OFFSET_START_CENTDIR = 9
149
150 def _check_zipfile(fp):
151 try:
152 if _EndRecData(fp):
153 return True # file has correct magic number
154 except IOError:
155 pass
156 return False
157
158 def is_zipfile(filename):
159 """Quickly see if a file is a ZIP file by checking the magic number.
160
161 The filename argument may be a file or file-like object too.
162 """
163 result = False
164 try:
165 if hasattr(filename, "read"):
166 result = _check_zipfile(fp=filename)
167 else:
168 with open(filename, "rb") as fp:
169 result = _check_zipfile(fp)
170 except IOError:
171 pass
172 return result
173
174 def _EndRecData64(fpin, offset, endrec):
175 """
176 Read the ZIP64 end-of-archive records and use that to update endrec
177 """
178 try:
179 fpin.seek(offset - sizeEndCentDir64Locator, 2)
180 except IOError:
181 # If the seek fails, the file is not large enough to contain a ZIP64
182 # end-of-archive record, so just return the end record we were given.
183 return endrec
184
185 data = fpin.read(sizeEndCentDir64Locator)
186 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
187 if sig != stringEndArchive64Locator:
188 return endrec
189
190 if diskno != 0 or disks != 1:
191 raise BadZipFile("zipfiles that span multiple disks are not supported")
192
193 # Assume no 'zip64 extensible data'
194 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
195 data = fpin.read(sizeEndCentDir64)
196 sig, sz, create_version, read_version, disk_num, disk_dir, \
197 dircount, dircount2, dirsize, diroffset = \
198 struct.unpack(structEndArchive64, data)
199 if sig != stringEndArchive64:
200 return endrec
201
202 # Update the original endrec using data from the ZIP64 record
203 endrec[_ECD_SIGNATURE] = sig
204 endrec[_ECD_DISK_NUMBER] = disk_num
205 endrec[_ECD_DISK_START] = disk_dir
206 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
207 endrec[_ECD_ENTRIES_TOTAL] = dircount2
208 endrec[_ECD_SIZE] = dirsize
209 endrec[_ECD_OFFSET] = diroffset
210 return endrec
211
212
213 def _EndRecData(fpin):
214 """Return data from the "End of Central Directory" record, or None.
215
216 The data is a list of the nine items in the ZIP "End of central dir"
217 record followed by a tenth item, the file seek offset of this record."""
218
219 # Determine file size
220 fpin.seek(0, 2)
221 filesize = fpin.tell()
222
223 # Check to see if this is ZIP file with no archive comment (the
224 # "end of central directory" structure should be the last item in the
225 # file if this is the case).
226 try:
227 fpin.seek(-sizeEndCentDir, 2)
228 except IOError:
229 return None
230 data = fpin.read()
231 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
232 # the signature is correct and there's no comment, unpack structure
233 endrec = struct.unpack(structEndArchive, data)
234 endrec=list(endrec)
235
236 # Append a blank comment and record start offset
237 endrec.append(b"")
238 endrec.append(filesize - sizeEndCentDir)
239
240 # Try to read the "Zip64 end of central directory" structure
241 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
242
243 # Either this is not a ZIP file, or it is a ZIP file with an archive
244 # comment. Search the end of the file for the "end of central directory"
245 # record signature. The comment is the last item in the ZIP file and may be
246 # up to 64K long. It is assumed that the "end of central directory" magic
247 # number does not appear in the comment.
248 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
249 fpin.seek(maxCommentStart, 0)
250 data = fpin.read()
251 start = data.rfind(stringEndArchive)
252 if start >= 0:
253 # found the magic number; attempt to unpack and interpret
254 recData = data[start:start+sizeEndCentDir]
255 endrec = list(struct.unpack(structEndArchive, recData))
256 comment = data[start+sizeEndCentDir:]
257 # check that comment length is correct
258 if endrec[_ECD_COMMENT_SIZE] == len(comment):
259 # Append the archive comment and start offset
260 endrec.append(comment)
261 endrec.append(maxCommentStart + start)
262
263 # Try to read the "Zip64 end of central directory" structure
264 return _EndRecData64(fpin, maxCommentStart + start - filesize,
265 endrec)
266
267 # Unable to find a valid end of central directory structure
268 return
269
270
271 class ZipInfo (object):
272 """Class with attributes describing each file in the ZIP archive."""
273
274 __slots__ = (
275 'orig_filename',
276 'filename',
277 'date_time',
278 'compress_type',
279 'comment',
280 'extra',
281 'create_system',
282 'create_version',
283 'extract_version',
284 'reserved',
285 'flag_bits',
286 'volume',
287 'internal_attr',
288 'external_attr',
289 'header_offset',
290 'CRC',
291 'compress_size',
292 'file_size',
293 '_raw_time',
294 )
295
296 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
297 self.orig_filename = filename # Original file name in archive
298
299 # Terminate the file name at the first null byte. Null bytes in file
300 # names are used as tricks by viruses in archives.
301 null_byte = filename.find(chr(0))
302 if null_byte >= 0:
303 filename = filename[0:null_byte]
304 # This is used to ensure paths in generated ZIP files always use
305 # forward slashes as the directory separator, as required by the
306 # ZIP format specification.
307 if os.sep != "/" and os.sep in filename:
308 filename = filename.replace(os.sep, "/")
309
310 self.filename = filename # Normalized file name
311 self.date_time = date_time # year, month, day, hour, min, sec
312 # Standard values:
313 self.compress_type = ZIP_STORED # Type of compression for the file
314 self.comment = b"" # Comment for each file
315 self.extra = b"" # ZIP extra data
316 if sys.platform == 'win32':
317 self.create_system = 0 # System which created ZIP archive
318 else:
319 # Assume everything else is unix-y
320 self.create_system = 3 # System which created ZIP archive
321 self.create_version = 20 # Version which created ZIP archive
322 self.extract_version = 20 # Version needed to extract archive
323 self.reserved = 0 # Must be zero
324 self.flag_bits = 0 # ZIP flag bits
325 self.volume = 0 # Volume number of file header
326 self.internal_attr = 0 # Internal attributes
327 self.external_attr = 0 # External file attributes
328 # Other attributes are set by class ZipFile:
329 # header_offset Byte offset to the file header
330 # CRC CRC-32 of the uncompressed file
331 # compress_size Size of the compressed file
332 # file_size Size of the uncompressed file
333
334 def FileHeader(self):
335 """Return the per-file header as a string."""
336 dt = self.date_time
337 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
338 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
339 if self.flag_bits & 0x08:
340 # Set these to zero because we write them after the file data
341 CRC = compress_size = file_size = 0
342 else:
343 CRC = self.CRC
344 compress_size = self.compress_size
345 file_size = self.file_size
346
347 extra = self.extra
348
349 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
350 # File is larger than what fits into a 4 byte integer,
351 # fall back to the ZIP64 extension
352 fmt = '<HHQQ'
353 extra = extra + struct.pack(fmt,
354 1, struct.calcsize(fmt)-4, file_size, compress_size)
355 file_size = 0xffffffff
356 compress_size = 0xffffffff
357 self.extract_version = max(45, self.extract_version)
358 self.create_version = max(45, self.extract_version)
359
360 filename, flag_bits = self._encodeFilenameFlags()
361 header = struct.pack(structFileHeader, stringFileHeader,
362 self.extract_version, self.reserved, flag_bits,
363 self.compress_type, dostime, dosdate, CRC,
364 compress_size, file_size,
365 len(filename), len(extra))
366 return header + filename + extra
367
368 def _encodeFilenameFlags(self):
369 try:
370 return self.filename.encode('ascii'), self.flag_bits
371 except UnicodeEncodeError:
372 return self.filename.encode('utf-8'), self.flag_bits | 0x800
373
374 def _decodeExtra(self):
375 # Try to decode the extra field.
376 extra = self.extra
377 unpack = struct.unpack
378 while extra:
379 tp, ln = unpack('<HH', extra[:4])
380 if tp == 1:
381 if ln >= 24:
382 counts = unpack('<QQQ', extra[4:28])
383 elif ln == 16:
384 counts = unpack('<QQ', extra[4:20])
385 elif ln == 8:
386 counts = unpack('<Q', extra[4:12])
387 elif ln == 0:
388 counts = ()
389 else:
390 raise RuntimeError("Corrupt extra field %s"%(ln,))
391
392 idx = 0
393
394 # ZIP64 extension (large files and/or large archives)
395 if self.file_size in (0xffffffffffffffff, 0xffffffff):
396 self.file_size = counts[idx]
397 idx += 1
398
399 if self.compress_size == 0xFFFFFFFF:
400 self.compress_size = counts[idx]
401 idx += 1
402
403 if self.header_offset == 0xffffffff:
404 old = self.header_offset
405 self.header_offset = counts[idx]
406 idx+=1
407
408 extra = extra[ln+4:]
409
410
411 class _ZipDecrypter:
412 """Class to handle decryption of files stored within a ZIP archive.
413
414 ZIP supports a password-based form of encryption. Even though known
415 plaintext attacks have been found against it, it is still useful
416 to be able to get data out of such a file.
417
418 Usage:
419 zd = _ZipDecrypter(mypwd)
420 plain_char = zd(cypher_char)
421 plain_text = map(zd, cypher_text)
422 """
423
424 def _GenerateCRCTable():
425 """Generate a CRC-32 table.
426
427 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
428 internal keys. We noticed that a direct implementation is faster than
429 relying on binascii.crc32().
430 """
431 poly = 0xedb88320
432 table = [0] * 256
433 for i in range(256):
434 crc = i
435 for j in range(8):
436 if crc & 1:
437 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
438 else:
439 crc = ((crc >> 1) & 0x7FFFFFFF)
440 table[i] = crc
441 return table
442 crctable = _GenerateCRCTable()
443
444 def _crc32(self, ch, crc):
445 """Compute the CRC32 primitive on one byte."""
446 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
447
448 def __init__(self, pwd):
449 self.key0 = 305419896
450 self.key1 = 591751049
451 self.key2 = 878082192
452 for p in pwd:
453 self._UpdateKeys(p)
454
455 def _UpdateKeys(self, c):
456 self.key0 = self._crc32(c, self.key0)
457 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
458 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
459 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
460
461 def __call__(self, c):
462 """Decrypt a single character."""
463 assert isinstance(c, int)
464 k = self.key2 | 2
465 c = c ^ (((k * (k^1)) >> 8) & 255)
466 self._UpdateKeys(c)
467 return c
468
469 class ZipExtFile(io.BufferedIOBase):
470 """File-like object for reading an archive member.
471 Is returned by ZipFile.open().
472 """
473
474 # Max size supported by decompressor.
475 MAX_N = 1 << 31 - 1
476
477 # Read from compressed files in 4k blocks.
478 MIN_READ_SIZE = 4096
479
480 # Search for universal newlines or line chunks.
481 PATTERN = re.compile(br'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
482
483 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
484 close_fileobj=False):
485 self._fileobj = fileobj
486 self._decrypter = decrypter
487 self._close_fileobj = close_fileobj
488
489 self._compress_type = zipinfo.compress_type
490 self._compress_size = zipinfo.compress_size
491 self._compress_left = zipinfo.compress_size
492
493 if self._compress_type == ZIP_DEFLATED:
494 self._decompressor = zlib.decompressobj(-15)
495 elif self._compress_type == ZIP_BZIP2:
496 self._decompressor = bz2.BZ2Decompressor()
497 self.MIN_READ_SIZE = 900000
498 self._unconsumed = b''
499
500 self._readbuffer = b''
501 self._offset = 0
502
503 self._universal = 'U' in mode
504 self.newlines = None
505
506 # Adjust read size for encrypted files since the first 12 bytes
507 # are for the encryption/password information.
508 if self._decrypter is not None:
509 self._compress_left -= 12
510
511 self.mode = mode
512 self.name = zipinfo.filename
513
514 if hasattr(zipinfo, 'CRC'):
515 self._expected_crc = zipinfo.CRC
516 self._running_crc = crc32(b'') & 0xffffffff
517 else:
518 self._expected_crc = None
519
520 def readline(self, limit=-1):
521 """Read and return a line from the stream.
522
523 If limit is specified, at most limit bytes will be read.
524 """
525
526 if not self._universal and limit < 0:
527 # Shortcut common case - newline found in buffer.
528 i = self._readbuffer.find(b'\n', self._offset) + 1
529 if i > 0:
530 line = self._readbuffer[self._offset: i]
531 self._offset = i
532 return line
533
534 if not self._universal:
535 return io.BufferedIOBase.readline(self, limit)
536
537 line = b''
538 while limit < 0 or len(line) < limit:
539 readahead = self.peek(2)
540 if readahead == b'':
541 return line
542
543 #
544 # Search for universal newlines or line chunks.
545 #
546 # The pattern returns either a line chunk or a newline, but not
547 # both. Combined with peek(2), we are assured that the sequence
548 # '\r\n' is always retrieved completely and never split into
549 # separate newlines - '\r', '\n' due to coincidental readaheads.
550 #
551 match = self.PATTERN.search(readahead)
552 newline = match.group('newline')
553 if newline is not None:
554 if self.newlines is None:
555 self.newlines = []
556 if newline not in self.newlines:
557 self.newlines.append(newline)
558 self._offset += len(newline)
559 return line + b'\n'
560
561 chunk = match.group('chunk')
562 if limit >= 0:
563 chunk = chunk[: limit - len(line)]
564
565 self._offset += len(chunk)
566 line += chunk
567
568 return line
569
570 def peek(self, n=1):
571 """Returns buffered bytes without advancing the position."""
572 if n > len(self._readbuffer) - self._offset:
573 chunk = self.read(n)
574 self._offset -= len(chunk)
575
576 # Return up to 512 bytes to reduce allocation overhead for tight loops.
577 return self._readbuffer[self._offset: self._offset + 512]
578
579 def readable(self):
580 return True
581
582 def read(self, n=-1):
583 """Read and return up to n bytes.
584 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
585 """
586 buf = b''
587 if n is None:
588 n = -1
589 while True:
590 if n < 0:
591 data = self.read1(n)
592 elif n > len(buf):
593 data = self.read1(n - len(buf))
594 else:
595 return buf
596 if len(data) == 0:
597 return buf
598 buf += data
599
600 def _update_crc(self, newdata, eof):
601 # Update the CRC using the given data.
602 if self._expected_crc is None:
603 # No need to compute the CRC if we don't have a reference value
604 return
605 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
606 # Check the CRC if we're at the end of the file
607 if eof and self._running_crc != self._expected_crc:
608 raise BadZipFile("Bad CRC-32 for file %r" % self.name)
609
610 def read1(self, n):
611 """Read up to n bytes with at most one read() system call."""
612
613 # Simplify algorithm (branching) by transforming negative n to large n.
614 if n < 0 or n is None:
615 n = self.MAX_N
616
617 # Bytes available in read buffer.
618 len_readbuffer = len(self._readbuffer) - self._offset
619
620 # Read from file.
621 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
622 nbytes = n - len_readbuffer - len(self._unconsumed)
623 nbytes = max(nbytes, self.MIN_READ_SIZE)
624 nbytes = min(nbytes, self._compress_left)
625
626 data = self._fileobj.read(nbytes)
627 self._compress_left -= len(data)
628
629 if data and self._decrypter is not None:
630 data = bytes(map(self._decrypter, data))
631
632 if self._compress_type == ZIP_STORED:
633 self._update_crc(data, eof=(self._compress_left==0))
634 self._readbuffer = self._readbuffer[self._offset:] + data
635 self._offset = 0
636 else:
637 # Prepare deflated bytes for decompression.
638 self._unconsumed += data
639
640 # Handle unconsumed data.
641 if (len(self._unconsumed) > 0 and n > len_readbuffer and
642 self._compress_type == ZIP_DEFLATED):
643 data = self._decompressor.decompress(
644 self._unconsumed,
645 max(n - len_readbuffer, self.MIN_READ_SIZE)
646 )
647
648 self._unconsumed = self._decompressor.unconsumed_tail
649 eof = len(self._unconsumed) == 0 and self._compress_left == 0
650 if eof:
651 data += self._decompressor.flush()
652
653 self._update_crc(data, eof=eof)
654 self._readbuffer = self._readbuffer[self._offset:] + data
655 self._offset = 0
656 elif (len(self._unconsumed) > 0 and n > len_readbuffer and
657 self._compress_type == ZIP_BZIP2):
658 try:
659 data = self._decompressor.decompress(self._unconsumed)
660 except EOFError:
661 eof = self._compress_left
662 data = b''
663 else:
664 eof = False
665 self._unconsumed = b''
666
667 self._update_crc(data, eof=eof)
668 self._readbuffer = self._readbuffer[self._offset:] + data
669 self._offset = 0
670
671 # Read from buffer.
672 data = self._readbuffer[self._offset: self._offset + n]
673 self._offset += len(data)
674 return data
675
676 def close(self):
677 try:
678 if self._close_fileobj:
679 self._fileobj.close()
680 finally:
681 super().close()
682
683
684 class ZipFile:
685 """ Class with methods to open, read, write, close, list zip files.
686
687 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
688
689 file: Either the path to the file, or a file-like object.
690 If it is a path, the file will be opened and closed by ZipFile.
691 mode: The mode can be either read "r", write "w" or append "a".
692 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib)
693 or ZIP_BZIP2 (requires bz2).
694 allowZip64: if True ZipFile will create files with ZIP64 extensions when
695 needed, otherwise it will raise an exception when this would
696 be necessary.
697
698 """
699
700 fp = None # Set here since __del__ checks it
701
702 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
703 """Open the ZIP file with mode read "r", write "w" or append "a"."""
704 if mode not in ("r", "w", "a"):
705 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
706
707 if compression == ZIP_STORED:
708 pass
709 elif compression == ZIP_DEFLATED:
710 if not zlib:
711 raise RuntimeError(
712 "Compression requires the (missing) zlib module")
713 elif compression == ZIP_BZIP2:
714 if not bz2:
715 raise RuntimeError(
716 "Compression requires the (missing) bz2 module")
717 else:
718 raise RuntimeError("That compression method is not supported")
719
720 self._allowZip64 = allowZip64
721 self._didModify = False
722 self.debug = 0 # Level of printing: 0 through 3
723 self.NameToInfo = {} # Find file info given name
724 self.filelist = [] # List of ZipInfo instances for archive
725 self.compression = compression # Method of compression
726 self.mode = key = mode.replace('b', '')[0]
727 self.pwd = None
728 self.comment = b''
729
730 # Check if we were passed a file-like object
731 if isinstance(file, str):
732 # No, it's a filename
733 self._filePassed = 0
734 self.filename = file
735 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
736 try:
737 self.fp = io.open(file, modeDict[mode])
738 except IOError:
739 if mode == 'a':
740 mode = key = 'w'
741 self.fp = io.open(file, modeDict[mode])
742 else:
743 raise
744 else:
745 self._filePassed = 1
746 self.fp = file
747 self.filename = getattr(file, 'name', None)
748
749 if key == 'r':
750 self._GetContents()
751 elif key == 'w':
752 # set the modified flag so central directory gets written
753 # even if no files are added to the archive
754 self._didModify = True
755 elif key == 'a':
756 try:
757 # See if file is a zip file
758 self._RealGetContents()
759 # seek to start of directory and overwrite
760 self.fp.seek(self.start_dir, 0)
761 except BadZipFile:
762 # file is not a zip file, just append
763 self.fp.seek(0, 2)
764
765 # set the modified flag so central directory gets written
766 # even if no files are added to the archive
767 self._didModify = True
768 else:
769 if not self._filePassed:
770 self.fp.close()
771 self.fp = None
772 raise RuntimeError('Mode must be "r", "w" or "a"')
773
774 def __enter__(self):
775 return self
776
777 def __exit__(self, type, value, traceback):
778 self.close()
779
780 def _GetContents(self):
781 """Read the directory, making sure we close the file if the format
782 is bad."""
783 try:
784 self._RealGetContents()
785 except BadZipFile:
786 if not self._filePassed:
787 self.fp.close()
788 self.fp = None
789 raise
790
791 def _RealGetContents(self):
792 """Read in the table of contents for the ZIP file."""
793 fp = self.fp
794 try:
795 endrec = _EndRecData(fp)
796 except IOError:
797 raise BadZipFile("File is not a zip file")
798 if not endrec:
799 raise BadZipFile("File is not a zip file")
800 if self.debug > 1:
801 print(endrec)
802 size_cd = endrec[_ECD_SIZE] # bytes in central directory
803 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
804 self.comment = endrec[_ECD_COMMENT] # archive comment
805
806 # "concat" is zero, unless zip was concatenated to another file
807 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
808 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
809 # If Zip64 extension structures are present, account for them
810 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
811
812 if self.debug > 2:
813 inferred = concat + offset_cd
814 print("given, inferred, offset", offset_cd, inferred, concat)
815 # self.start_dir: Position of start of central directory
816 self.start_dir = offset_cd + concat
817 fp.seek(self.start_dir, 0)
818 data = fp.read(size_cd)
819 fp = io.BytesIO(data)
820 total = 0
821 while total < size_cd:
822 centdir = fp.read(sizeCentralDir)
823 if centdir[0:4] != stringCentralDir:
824 raise BadZipFile("Bad magic number for central directory")
825 centdir = struct.unpack(structCentralDir, centdir)
826 if self.debug > 2:
827 print(centdir)
828 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
829 flags = centdir[5]
830 if flags & 0x800:
831 # UTF-8 file names extension
832 filename = filename.decode('utf-8')
833 else:
834 # Historical ZIP filename encoding
835 filename = filename.decode('cp437')
836 # Create ZipInfo instance to store file information
837 x = ZipInfo(filename)
838 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
839 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
840 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
841 (x.create_version, x.create_system, x.extract_version, x.reserved,
842 x.flag_bits, x.compress_type, t, d,
843 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
844 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
845 # Convert date/time code to (year, month, day, hour, min, sec)
846 x._raw_time = t
847 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
848 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
849
850 x._decodeExtra()
851 x.header_offset = x.header_offset + concat
852 self.filelist.append(x)
853 self.NameToInfo[x.filename] = x
854
855 # update total bytes read from central directory
856 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
857 + centdir[_CD_EXTRA_FIELD_LENGTH]
858 + centdir[_CD_COMMENT_LENGTH])
859
860 if self.debug > 2:
861 print("total", total)
862
863
864 def namelist(self):
865 """Return a list of file names in the archive."""
866 l = []
867 for data in self.filelist:
868 l.append(data.filename)
869 return l
870
871 def infolist(self):
872 """Return a list of class ZipInfo instances for files in the
873 archive."""
874 return self.filelist
875
876 def printdir(self, file=None):
877 """Print a table of contents for the zip file."""
878 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
879 file=file)
880 for zinfo in self.filelist:
881 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
882 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
883 file=file)
884
885 def testzip(self):
886 """Read all the files and check the CRC."""
887 chunk_size = 2 ** 20
888 for zinfo in self.filelist:
889 try:
890 # Read by chunks, to avoid an OverflowError or a
891 # MemoryError with very large embedded files.
892 f = self.open(zinfo.filename, "r")
893 while f.read(chunk_size): # Check CRC-32
894 pass
895 except BadZipFile:
896 return zinfo.filename
897
898 def getinfo(self, name):
899 """Return the instance of ZipInfo given 'name'."""
900 info = self.NameToInfo.get(name)
901 if info is None:
902 raise KeyError(
903 'There is no item named %r in the archive' % name)
904
905 return info
906
907 def setpassword(self, pwd):
908 """Set default password for encrypted files."""
909 if pwd and not isinstance(pwd, bytes):
910 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
911 if pwd:
912 self.pwd = pwd
913 else:
914 self.pwd = None
915
916 def read(self, name, pwd=None):
917 """Return file bytes (as a string) for name."""
918 with self.open(name, "r", pwd) as fp:
919 return fp.read()
920
921 def open(self, name, mode="r", pwd=None):
922 """Return file-like object for 'name'."""
923 if mode not in ("r", "U", "rU"):
924 raise RuntimeError('open() requires mode "r", "U", or "rU"')
925 if pwd and not isinstance(pwd, bytes):
926 raise TypeError("pwd: expected bytes, got %s" % type(pwd))
927 if not self.fp:
928 raise RuntimeError(
929 "Attempt to read ZIP archive that was already closed")
930
931 # Only open a new file for instances where we were not
932 # given a file object in the constructor
933 if self._filePassed:
934 zef_file = self.fp
935 else:
936 zef_file = io.open(self.filename, 'rb')
937
938 # Make sure we have an info object
939 if isinstance(name, ZipInfo):
940 # 'name' is already an info object
941 zinfo = name
942 else:
943 # Get info object for name
944 try:
945 zinfo = self.getinfo(name)
946 except KeyError:
947 if not self._filePassed:
948 zef_file.close()
949 raise
950 zef_file.seek(zinfo.header_offset, 0)
951
952 # Skip the file header:
953 fheader = zef_file.read(sizeFileHeader)
954 if fheader[0:4] != stringFileHeader:
955 raise BadZipFile("Bad magic number for file header")
956
957 fheader = struct.unpack(structFileHeader, fheader)
958 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
959 if fheader[_FH_EXTRA_FIELD_LENGTH]:
960 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
961
962 if zinfo.flag_bits & 0x800:
963 # UTF-8 filename
964 fname_str = fname.decode("utf-8")
965 else:
966 fname_str = fname.decode("cp437")
967
968 if fname_str != zinfo.orig_filename:
969 if not self._filePassed:
970 zef_file.close()
971 raise BadZipFile(
972 'File name in directory %r and header %r differ.'
973 % (zinfo.orig_filename, fname))
974
975 # check for encrypted flag & handle password
976 is_encrypted = zinfo.flag_bits & 0x1
977 zd = None
978 if is_encrypted:
979 if not pwd:
980 pwd = self.pwd
981 if not pwd:
982 if not self._filePassed:
983 zef_file.close()
984 raise RuntimeError("File %s is encrypted, "
985 "password required for extraction" % name)
986
987 zd = _ZipDecrypter(pwd)
988 # The first 12 bytes in the cypher stream is an encryption header
989 # used to strengthen the algorithm. The first 11 bytes are
990 # completely random, while the 12th contains the MSB of the CRC,
991 # or the MSB of the file time depending on the header type
992 # and is used to check the correctness of the password.
993 header = zef_file.read(12)
994 h = list(map(zd, header[0:12]))
995 if zinfo.flag_bits & 0x8:
996 # compare against the file type from extended local headers
997 check_byte = (zinfo._raw_time >> 8) & 0xff
998 else:
999 # compare against the CRC otherwise
1000 check_byte = (zinfo.CRC >> 24) & 0xff
1001 if h[11] != check_byte:
1002 if not self._filePassed:
1003 zef_file.close()
1004 raise RuntimeError("Bad password for file", name)
1005
1006 return ZipExtFile(zef_file, mode, zinfo, zd,
1007 close_fileobj=not self._filePassed)
1008
1009 def extract(self, member, path=None, pwd=None):
1010 """Extract a member from the archive to the current working directory,
1011 using its full name. Its file information is extracted as accurately
1012 as possible. `member' may be a filename or a ZipInfo object. You can
1013 specify a different directory using `path'.
1014 """
1015 if not isinstance(member, ZipInfo):
1016 member = self.getinfo(member)
1017
1018 if path is None:
1019 path = os.getcwd()
1020
1021 return self._extract_member(member, path, pwd)
1022
1023 def extractall(self, path=None, members=None, pwd=None):
1024 """Extract all members from the archive to the current working
1025 directory. `path' specifies a different directory to extract to.
1026 `members' is optional and must be a subset of the list returned
1027 by namelist().
1028 """
1029 if members is None:
1030 members = self.namelist()
1031
1032 for zipinfo in members:
1033 self.extract(zipinfo, path, pwd)
1034
1035 def _extract_member(self, member, targetpath, pwd):
1036 """Extract the ZipInfo object 'member' to a physical
1037 file on the path targetpath.
1038 """
1039 # build the destination pathname, replacing
1040 # forward slashes to platform specific separators.
1041 # Strip trailing path separator, unless it represents the root.
1042 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
1043 and len(os.path.splitdrive(targetpath)[1]) > 1):
1044 targetpath = targetpath[:-1]
1045
1046 # don't include leading "/" from file name if present
1047 if member.filename[0] == '/':
1048 targetpath = os.path.join(targetpath, member.filename[1:])
1049 else:
1050 targetpath = os.path.join(targetpath, member.filename)
1051
1052 targetpath = os.path.normpath(targetpath)
1053
1054 # Create all upper directories if necessary.
1055 upperdirs = os.path.dirname(targetpath)
1056 if upperdirs and not os.path.exists(upperdirs):
1057 os.makedirs(upperdirs)
1058
1059 if member.filename[-1] == '/':
1060 if not os.path.isdir(targetpath):
1061 os.mkdir(targetpath)
1062 return targetpath
1063
1064 source = self.open(member, pwd=pwd)
1065 target = open(targetpath, "wb")
1066 shutil.copyfileobj(source, target)
1067 source.close()
1068 target.close()
1069
1070 return targetpath
1071
1072 def _writecheck(self, zinfo):
1073 """Check for errors before writing a file to the archive."""
1074 if zinfo.filename in self.NameToInfo:
1075 if self.debug: # Warning for duplicate names
1076 print("Duplicate name:", zinfo.filename)
1077 if self.mode not in ("w", "a"):
1078 raise RuntimeError('write() requires mode "w" or "a"')
1079 if not self.fp:
1080 raise RuntimeError(
1081 "Attempt to write ZIP archive that was already closed")
1082 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1083 raise RuntimeError(
1084 "Compression requires the (missing) zlib module")
1085 if zinfo.compress_type == ZIP_BZIP2 and not bz2:
1086 raise RuntimeError(
1087 "Compression requires the (missing) bz2 module")
1088 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED, ZIP_BZIP2):
1089 raise RuntimeError("That compression method is not supported")
1090 if zinfo.file_size > ZIP64_LIMIT:
1091 if not self._allowZip64:
1092 raise LargeZipFile("Filesize would require ZIP64 extensions")
1093 if zinfo.header_offset > ZIP64_LIMIT:
1094 if not self._allowZip64:
1095 raise LargeZipFile(
1096 "Zipfile size would require ZIP64 extensions")
1097
1098 def write(self, filename, arcname=None, compress_type=None):
1099 """Put the bytes from filename into the archive under the name
1100 arcname."""
1101 if not self.fp:
1102 raise RuntimeError(
1103 "Attempt to write to ZIP archive that was already closed")
1104
1105 st = os.stat(filename)
1106 isdir = stat.S_ISDIR(st.st_mode)
1107 mtime = time.localtime(st.st_mtime)
1108 date_time = mtime[0:6]
1109 # Create ZipInfo instance to store file information
1110 if arcname is None:
1111 arcname = filename
1112 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1113 while arcname[0] in (os.sep, os.altsep):
1114 arcname = arcname[1:]
1115 if isdir:
1116 arcname += '/'
1117 zinfo = ZipInfo(arcname, date_time)
1118 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
1119 if compress_type is None:
1120 zinfo.compress_type = self.compression
1121 else:
1122 zinfo.compress_type = compress_type
1123
1124 zinfo.file_size = st.st_size
1125 zinfo.flag_bits = 0x00
1126 zinfo.header_offset = self.fp.tell() # Start of header bytes
1127
1128 self._writecheck(zinfo)
1129 self._didModify = True
1130
1131 if isdir:
1132 zinfo.file_size = 0
1133 zinfo.compress_size = 0
1134 zinfo.CRC = 0
1135 self.filelist.append(zinfo)
1136 self.NameToInfo[zinfo.filename] = zinfo
1137 self.fp.write(zinfo.FileHeader())
1138 return
1139
1140 with open(filename, "rb") as fp:
1141 # Must overwrite CRC and sizes with correct data later
1142 zinfo.CRC = CRC = 0
1143 zinfo.compress_size = compress_size = 0
1144 zinfo.file_size = file_size = 0
1145 self.fp.write(zinfo.FileHeader())
1146 if zinfo.compress_type == ZIP_DEFLATED:
1147 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1148 zlib.DEFLATED, -15)
1149 elif zinfo.compress_type == ZIP_BZIP2:
1150 cmpr = bz2.BZ2Compressor()
1151 else:
1152 cmpr = None
1153 while 1:
1154 buf = fp.read(1024 * 8)
1155 if not buf:
1156 break
1157 file_size = file_size + len(buf)
1158 CRC = crc32(buf, CRC) & 0xffffffff
1159 if cmpr:
1160 buf = cmpr.compress(buf)
1161 compress_size = compress_size + len(buf)
1162 self.fp.write(buf)
1163 if cmpr:
1164 buf = cmpr.flush()
1165 compress_size = compress_size + len(buf)
1166 self.fp.write(buf)
1167 zinfo.compress_size = compress_size
1168 else:
1169 zinfo.compress_size = file_size
1170 zinfo.CRC = CRC
1171 zinfo.file_size = file_size
1172 # Seek backwards and write CRC and file sizes
1173 position = self.fp.tell() # Preserve current position in file
1174 self.fp.seek(zinfo.header_offset + 14, 0)
1175 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1176 zinfo.file_size))
1177 self.fp.seek(position, 0)
1178 self.filelist.append(zinfo)
1179 self.NameToInfo[zinfo.filename] = zinfo
1180
1181 def writestr(self, zinfo_or_arcname, data, compress_type=None):
1182 """Write a file into the archive. The contents is 'data', which
1183 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1184 it is encoded as UTF-8 first.
1185 'zinfo_or_arcname' is either a ZipInfo instance or
1186 the name of the file in the archive."""
1187 if isinstance(data, str):
1188 data = data.encode("utf-8")
1189 if not isinstance(zinfo_or_arcname, ZipInfo):
1190 zinfo = ZipInfo(filename=zinfo_or_arcname,
1191 date_time=time.localtime(time.time())[:6])
1192 zinfo.compress_type = self.compression
1193 zinfo.external_attr = 0o600 << 16
1194 else:
1195 zinfo = zinfo_or_arcname
1196
1197 if not self.fp:
1198 raise RuntimeError(
1199 "Attempt to write to ZIP archive that was already closed")
1200
1201 zinfo.file_size = len(data) # Uncompressed size
1202 zinfo.header_offset = self.fp.tell() # Start of header data
1203 if compress_type is not None:
1204 zinfo.compress_type = compress_type
1205
1206 self._writecheck(zinfo)
1207 self._didModify = True
1208 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
1209 if zinfo.compress_type == ZIP_DEFLATED:
1210 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1211 zlib.DEFLATED, -15)
1212 data = co.compress(data) + co.flush()
1213 zinfo.compress_size = len(data) # Compressed size
1214 elif zinfo.compress_type == ZIP_BZIP2:
1215 co = bz2.BZ2Compressor()
1216 data = co.compress(data) + co.flush()
1217 zinfo.compress_size = len(data) # Compressed size
1218 else:
1219 zinfo.compress_size = zinfo.file_size
1220 zinfo.header_offset = self.fp.tell() # Start of header data
1221 self.fp.write(zinfo.FileHeader())
1222 self.fp.write(data)
1223 self.fp.flush()
1224 if zinfo.flag_bits & 0x08:
1225 # Write CRC and file sizes after the file data
1226 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1227 zinfo.file_size))
1228 self.filelist.append(zinfo)
1229 self.NameToInfo[zinfo.filename] = zinfo
1230
1231 def __del__(self):
1232 """Call the "close()" method in case the user forgot."""
1233 self.close()
1234
1235 def close(self):
1236 """Close the file, and for mode "w" and "a" write the ending
1237 records."""
1238 if self.fp is None:
1239 return
1240
1241 if self.mode in ("w", "a") and self._didModify: # write ending records
1242 count = 0
1243 pos1 = self.fp.tell()
1244 for zinfo in self.filelist: # write central directory
1245 count = count + 1
1246 dt = zinfo.date_time
1247 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1248 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1249 extra = []
1250 if zinfo.file_size > ZIP64_LIMIT \
1251 or zinfo.compress_size > ZIP64_LIMIT:
1252 extra.append(zinfo.file_size)
1253 extra.append(zinfo.compress_size)
1254 file_size = 0xffffffff
1255 compress_size = 0xffffffff
1256 else:
1257 file_size = zinfo.file_size
1258 compress_size = zinfo.compress_size
1259
1260 if zinfo.header_offset > ZIP64_LIMIT:
1261 extra.append(zinfo.header_offset)
1262 header_offset = 0xffffffff
1263 else:
1264 header_offset = zinfo.header_offset
1265
1266 extra_data = zinfo.extra
1267 if extra:
1268 # Append a ZIP64 field to the extra's
1269 extra_data = struct.pack(
1270 '<HH' + 'Q'*len(extra),
1271 1, 8*len(extra), *extra) + extra_data
1272
1273 extract_version = max(45, zinfo.extract_version)
1274 create_version = max(45, zinfo.create_version)
1275 else:
1276 extract_version = zinfo.extract_version
1277 create_version = zinfo.create_version
1278
1279 try:
1280 filename, flag_bits = zinfo._encodeFilenameFlags()
1281 centdir = struct.pack(structCentralDir,
1282 stringCentralDir, create_version,
1283 zinfo.create_system, extract_version, zinfo.reserved,
1284 flag_bits, zinfo.compress_type, dostime, dosdate,
1285 zinfo.CRC, compress_size, file_size,
1286 len(filename), len(extra_data), len(zinfo.comment),
1287 0, zinfo.internal_attr, zinfo.external_attr,
1288 header_offset)
1289 except DeprecationWarning:
1290 print((structCentralDir, stringCentralDir, create_version,
1291 zinfo.create_system, extract_version, zinfo.reserved,
1292 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1293 zinfo.CRC, compress_size, file_size,
1294 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1295 0, zinfo.internal_attr, zinfo.external_attr,
1296 header_offset), file=sys.stderr)
1297 raise
1298 self.fp.write(centdir)
1299 self.fp.write(filename)
1300 self.fp.write(extra_data)
1301 self.fp.write(zinfo.comment)
1302
1303 pos2 = self.fp.tell()
1304 # Write end-of-zip-archive record
1305 centDirCount = count
1306 centDirSize = pos2 - pos1
1307 centDirOffset = pos1
1308 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1309 centDirOffset > ZIP64_LIMIT or
1310 centDirSize > ZIP64_LIMIT):
1311 # Need to write the ZIP64 end-of-archive records
1312 zip64endrec = struct.pack(
1313 structEndArchive64, stringEndArchive64,
1314 44, 45, 45, 0, 0, centDirCount, centDirCount,
1315 centDirSize, centDirOffset)
1316 self.fp.write(zip64endrec)
1317
1318 zip64locrec = struct.pack(
1319 structEndArchive64Locator,
1320 stringEndArchive64Locator, 0, pos2, 1)
1321 self.fp.write(zip64locrec)
1322 centDirCount = min(centDirCount, 0xFFFF)
1323 centDirSize = min(centDirSize, 0xFFFFFFFF)
1324 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1325
1326 # check for valid comment length
1327 if len(self.comment) >= ZIP_MAX_COMMENT:
1328 if self.debug > 0:
1329 msg = 'Archive comment is too long; truncating to %d bytes' \
1330 % ZIP_MAX_COMMENT
1331 self.comment = self.comment[:ZIP_MAX_COMMENT]
1332
1333 endrec = struct.pack(structEndArchive, stringEndArchive,
1334 0, 0, centDirCount, centDirCount,
1335 centDirSize, centDirOffset, len(self.comment))
1336 self.fp.write(endrec)
1337 self.fp.write(self.comment)
1338 self.fp.flush()
1339
1340 if not self._filePassed:
1341 self.fp.close()
1342 self.fp = None
1343
1344
1345 class PyZipFile(ZipFile):
1346 """Class to create ZIP archives with Python library files and packages."""
1347
1348 def __init__(self, file, mode="r", compression=ZIP_STORED,
1349 allowZip64=False, optimize=-1):
1350 ZipFile.__init__(self, file, mode=mode, compression=compression,
1351 allowZip64=allowZip64)
1352 self._optimize = optimize
1353
1354 def writepy(self, pathname, basename=""):
1355 """Add all files from "pathname" to the ZIP archive.
1356
1357 If pathname is a package directory, search the directory and
1358 all package subdirectories recursively for all *.py and enter
1359 the modules into the archive. If pathname is a plain
1360 directory, listdir *.py and enter all modules. Else, pathname
1361 must be a Python *.py file and the module will be put into the
1362 archive. Added modules are always module.pyo or module.pyc.
1363 This method will compile the module.py into module.pyc if
1364 necessary.
1365 """
1366 dir, name = os.path.split(pathname)
1367 if os.path.isdir(pathname):
1368 initname = os.path.join(pathname, "__init__.py")
1369 if os.path.isfile(initname):
1370 # This is a package directory, add it
1371 if basename:
1372 basename = "%s/%s" % (basename, name)
1373 else:
1374 basename = name
1375 if self.debug:
1376 print("Adding package in", pathname, "as", basename)
1377 fname, arcname = self._get_codename(initname[0:-3], basename)
1378 if self.debug:
1379 print("Adding", arcname)
1380 self.write(fname, arcname)
1381 dirlist = os.listdir(pathname)
1382 dirlist.remove("__init__.py")
1383 # Add all *.py files and package subdirectories
1384 for filename in dirlist:
1385 path = os.path.join(pathname, filename)
1386 root, ext = os.path.splitext(filename)
1387 if os.path.isdir(path):
1388 if os.path.isfile(os.path.join(path, "__init__.py")):
1389 # This is a package directory, add it
1390 self.writepy(path, basename) # Recursive call
1391 elif ext == ".py":
1392 fname, arcname = self._get_codename(path[0:-3],
1393 basename)
1394 if self.debug:
1395 print("Adding", arcname)
1396 self.write(fname, arcname)
1397 else:
1398 # This is NOT a package directory, add its files at top level
1399 if self.debug:
1400 print("Adding files from directory", pathname)
1401 for filename in os.listdir(pathname):
1402 path = os.path.join(pathname, filename)
1403 root, ext = os.path.splitext(filename)
1404 if ext == ".py":
1405 fname, arcname = self._get_codename(path[0:-3],
1406 basename)
1407 if self.debug:
1408 print("Adding", arcname)
1409 self.write(fname, arcname)
1410 else:
1411 if pathname[-3:] != ".py":
1412 raise RuntimeError(
1413 'Files added with writepy() must end with ".py"')
1414 fname, arcname = self._get_codename(pathname[0:-3], basename)
1415 if self.debug:
1416 print("Adding file", arcname)
1417 self.write(fname, arcname)
1418
1419 def _get_codename(self, pathname, basename):
1420 """Return (filename, archivename) for the path.
1421
1422 Given a module name path, return the correct file path and
1423 archive name, compiling if necessary. For example, given
1424 /python/lib/string, return (/python/lib/string.pyc, string).
1425 """
1426 def _compile(file, optimize=-1):
1427 import py_compile
1428 if self.debug:
1429 print("Compiling", file)
1430 try:
1431 py_compile.compile(file, doraise=True, optimize=optimize)
1432 except py_compile.PyCompileError as error:
1433 print(err.msg)
1434 return False
1435 return True
1436
1437 file_py = pathname + ".py"
1438 file_pyc = pathname + ".pyc"
1439 file_pyo = pathname + ".pyo"
1440 pycache_pyc = imp.cache_from_source(file_py, True)
1441 pycache_pyo = imp.cache_from_source(file_py, False)
1442 if self._optimize == -1:
1443 # legacy mode: use whatever file is present
1444 if (os.path.isfile(file_pyo) and
1445 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime):
1446 # Use .pyo file.
1447 arcname = fname = file_pyo
1448 elif (os.path.isfile(file_pyc) and
1449 os.stat(file_pyc).st_mtime >= os.stat(file_py).st_mtime):
1450 # Use .pyc file.
1451 arcname = fname = file_pyc
1452 elif (os.path.isfile(pycache_pyc) and
1453 os.stat(pycache_pyc).st_mtime >= os.stat(file_py).st_mtime):
1454 # Use the __pycache__/*.pyc file, but write it to the legacy pyc
1455 # file name in the archive.
1456 fname = pycache_pyc
1457 arcname = file_pyc
1458 elif (os.path.isfile(pycache_pyo) and
1459 os.stat(pycache_pyo).st_mtime >= os.stat(file_py).st_mtime):
1460 # Use the __pycache__/*.pyo file, but write it to the legacy pyo
1461 # file name in the archive.
1462 fname = pycache_pyo
1463 arcname = file_pyo
1464 else:
1465 # Compile py into PEP 3147 pyc file.
1466 if _compile(file_py):
1467 fname = (pycache_pyc if __debug__ else pycache_pyo)
1468 arcname = (file_pyc if __debug__ else file_pyo)
1469 else:
1470 fname = arcname = file_py
1471 else:
1472 # new mode: use given optimization level
1473 if self._optimize == 0:
1474 fname = pycache_pyc
1475 arcname = file_pyc
1476 else:
1477 fname = pycache_pyo
1478 arcname = file_pyo
1479 if not (os.path.isfile(fname) and
1480 os.stat(fname).st_mtime >= os.stat(file_py).st_mtime):
1481 if not _compile(file_py, optimize=self._optimize):
1482 fname = arcname = file_py
1483 archivename = os.path.split(arcname)[1]
1484 if basename:
1485 archivename = "%s/%s" % (basename, archivename)
1486 return (fname, archivename)
1487
1488
1489 def main(args = None):
1490 import textwrap
1491 USAGE=textwrap.dedent("""\
1492 Usage:
1493 zipfile.py -l zipfile.zip # Show listing of a zipfile
1494 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1495 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1496 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1497 """)
1498 if args is None:
1499 args = sys.argv[1:]
1500
1501 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1502 print(USAGE)
1503 sys.exit(1)
1504
1505 if args[0] == '-l':
1506 if len(args) != 2:
1507 print(USAGE)
1508 sys.exit(1)
1509 zf = ZipFile(args[1], 'r')
1510 zf.printdir()
1511 zf.close()
1512
1513 elif args[0] == '-t':
1514 if len(args) != 2:
1515 print(USAGE)
1516 sys.exit(1)
1517 zf = ZipFile(args[1], 'r')
1518 badfile = zf.testzip()
1519 if badfile:
1520 print("The following enclosed file is corrupted: {!r}".format(badfile))
1521 print("Done testing")
1522
1523 elif args[0] == '-e':
1524 if len(args) != 3:
1525 print(USAGE)
1526 sys.exit(1)
1527
1528 zf = ZipFile(args[1], 'r')
1529 out = args[2]
1530 for path in zf.namelist():
1531 if path.startswith('./'):
1532 tgt = os.path.join(out, path[2:])
1533 else:
1534 tgt = os.path.join(out, path)
1535
1536 tgtdir = os.path.dirname(tgt)
1537 if not os.path.exists(tgtdir):
1538 os.makedirs(tgtdir)
1539 with open(tgt, 'wb') as fp:
1540 fp.write(zf.read(path))
1541 zf.close()
1542
1543 elif args[0] == '-c':
1544 if len(args) < 3:
1545 print(USAGE)
1546 sys.exit(1)
1547
1548 def addToZip(zf, path, zippath):
1549 if os.path.isfile(path):
1550 zf.write(path, zippath, ZIP_DEFLATED)
1551 elif os.path.isdir(path):
1552 for nm in os.listdir(path):
1553 addToZip(zf,
1554 os.path.join(path, nm), os.path.join(zippath, nm))
1555 # else: ignore
1556
1557 zf = ZipFile(args[1], 'w', allowZip64=True)
1558 for src in args[2:]:
1559 addToZip(zf, src, os.path.basename(src))
1560
1561 zf.close()
1562
1563 if __name__ == "__main__":
1564 main()