comparison zipfiles/zipfile31.py @ 153:45d4a9dc707b

Moved everything to do with zipfile to a new folder named zipfiles
author Oleg Oshmyan <chortos@inbox.lv>
date Fri, 03 Jun 2011 20:22:08 +0100
parents zipfile31.py@4ea7133ac25c
children b993d9257400
comparison
equal deleted inserted replaced
152:7951219d9866 153:45d4a9dc707b
1 """
2 Read and write ZIP files.
3
4 XXX references to utf-8 need further investigation.
5 """
6 # Improved by Chortos-2 in 2010 (added bzip2 support)
7 import struct, os, time, sys, shutil
8 import binascii, io, stat
9
10 try:
11 import zlib # We may need its compression method
12 crc32 = zlib.crc32
13 except ImportError:
14 zlib = None
15 crc32 = binascii.crc32
16
17 try:
18 import bz2 # We may need its compression method
19 except ImportError:
20 bz2 = None
21
22 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
23 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", "ZIP_BZIP2" ]
24
25 class BadZipfile(Exception):
26 pass
27
28
29 class LargeZipFile(Exception):
30 """
31 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
32 and those extensions are disabled.
33 """
34
35 error = BadZipfile # The exception raised by this module
36
37 ZIP64_LIMIT = (1 << 31) - 1
38 ZIP_FILECOUNT_LIMIT = 1 << 16
39 ZIP_MAX_COMMENT = (1 << 16) - 1
40
41 # constants for Zip file compression methods
42 ZIP_STORED = 0
43 ZIP_DEFLATED = 8
44 ZIP_BZIP2 = 12
45 # Other ZIP compression methods not supported
46
47 # Below are some formats and associated data for reading/writing headers using
48 # the struct module. The names and structures of headers/records are those used
49 # in the PKWARE description of the ZIP file format:
50 # http://www.pkware.com/documents/casestudies/APPNOTE.TXT
51 # (URL valid as of January 2008)
52
53 # The "end of central directory" structure, magic number, size, and indices
54 # (section V.I in the format document)
55 structEndArchive = b"<4s4H2LH"
56 stringEndArchive = b"PK\005\006"
57 sizeEndCentDir = struct.calcsize(structEndArchive)
58
59 _ECD_SIGNATURE = 0
60 _ECD_DISK_NUMBER = 1
61 _ECD_DISK_START = 2
62 _ECD_ENTRIES_THIS_DISK = 3
63 _ECD_ENTRIES_TOTAL = 4
64 _ECD_SIZE = 5
65 _ECD_OFFSET = 6
66 _ECD_COMMENT_SIZE = 7
67 # These last two indices are not part of the structure as defined in the
68 # spec, but they are used internally by this module as a convenience
69 _ECD_COMMENT = 8
70 _ECD_LOCATION = 9
71
72 # The "central directory" structure, magic number, size, and indices
73 # of entries in the structure (section V.F in the format document)
74 structCentralDir = "<4s4B4HL2L5H2L"
75 stringCentralDir = b"PK\001\002"
76 sizeCentralDir = struct.calcsize(structCentralDir)
77
78 # indexes of entries in the central directory structure
79 _CD_SIGNATURE = 0
80 _CD_CREATE_VERSION = 1
81 _CD_CREATE_SYSTEM = 2
82 _CD_EXTRACT_VERSION = 3
83 _CD_EXTRACT_SYSTEM = 4
84 _CD_FLAG_BITS = 5
85 _CD_COMPRESS_TYPE = 6
86 _CD_TIME = 7
87 _CD_DATE = 8
88 _CD_CRC = 9
89 _CD_COMPRESSED_SIZE = 10
90 _CD_UNCOMPRESSED_SIZE = 11
91 _CD_FILENAME_LENGTH = 12
92 _CD_EXTRA_FIELD_LENGTH = 13
93 _CD_COMMENT_LENGTH = 14
94 _CD_DISK_NUMBER_START = 15
95 _CD_INTERNAL_FILE_ATTRIBUTES = 16
96 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
97 _CD_LOCAL_HEADER_OFFSET = 18
98
99 # The "local file header" structure, magic number, size, and indices
100 # (section V.A in the format document)
101 structFileHeader = "<4s2B4HL2L2H"
102 stringFileHeader = b"PK\003\004"
103 sizeFileHeader = struct.calcsize(structFileHeader)
104
105 _FH_SIGNATURE = 0
106 _FH_EXTRACT_VERSION = 1
107 _FH_EXTRACT_SYSTEM = 2
108 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
109 _FH_COMPRESSION_METHOD = 4
110 _FH_LAST_MOD_TIME = 5
111 _FH_LAST_MOD_DATE = 6
112 _FH_CRC = 7
113 _FH_COMPRESSED_SIZE = 8
114 _FH_UNCOMPRESSED_SIZE = 9
115 _FH_FILENAME_LENGTH = 10
116 _FH_EXTRA_FIELD_LENGTH = 11
117
118 # The "Zip64 end of central directory locator" structure, magic number, and size
119 structEndArchive64Locator = "<4sLQL"
120 stringEndArchive64Locator = b"PK\x06\x07"
121 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
122
123 # The "Zip64 end of central directory" record, magic number, size, and indices
124 # (section V.G in the format document)
125 structEndArchive64 = "<4sQ2H2L4Q"
126 stringEndArchive64 = b"PK\x06\x06"
127 sizeEndCentDir64 = struct.calcsize(structEndArchive64)
128
129 _CD64_SIGNATURE = 0
130 _CD64_DIRECTORY_RECSIZE = 1
131 _CD64_CREATE_VERSION = 2
132 _CD64_EXTRACT_VERSION = 3
133 _CD64_DISK_NUMBER = 4
134 _CD64_DISK_NUMBER_START = 5
135 _CD64_NUMBER_ENTRIES_THIS_DISK = 6
136 _CD64_NUMBER_ENTRIES_TOTAL = 7
137 _CD64_DIRECTORY_SIZE = 8
138 _CD64_OFFSET_START_CENTDIR = 9
139
140 def _check_zipfile(fp):
141 try:
142 if _EndRecData(fp):
143 return True # file has correct magic number
144 except IOError:
145 pass
146 return False
147
148 def is_zipfile(filename):
149 """Quickly see if a file is a ZIP file by checking the magic number.
150
151 The filename argument may be a file or file-like object too.
152 """
153 result = False
154 try:
155 if hasattr(filename, "read"):
156 result = _check_zipfile(fp=filename)
157 else:
158 with open(filename, "rb") as fp:
159 result = _check_zipfile(fp)
160 except IOError:
161 pass
162 return result
163
164 def _EndRecData64(fpin, offset, endrec):
165 """
166 Read the ZIP64 end-of-archive records and use that to update endrec
167 """
168 try:
169 fpin.seek(offset - sizeEndCentDir64Locator, 2)
170 except IOError:
171 # If the seek fails, the file is not large enough to contain a ZIP64
172 # end-of-archive record, so just return the end record we were given.
173 return endrec
174
175 data = fpin.read(sizeEndCentDir64Locator)
176 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
177 if sig != stringEndArchive64Locator:
178 return endrec
179
180 if diskno != 0 or disks != 1:
181 raise BadZipfile("zipfiles that span multiple disks are not supported")
182
183 # Assume no 'zip64 extensible data'
184 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
185 data = fpin.read(sizeEndCentDir64)
186 sig, sz, create_version, read_version, disk_num, disk_dir, \
187 dircount, dircount2, dirsize, diroffset = \
188 struct.unpack(structEndArchive64, data)
189 if sig != stringEndArchive64:
190 return endrec
191
192 # Update the original endrec using data from the ZIP64 record
193 endrec[_ECD_SIGNATURE] = sig
194 endrec[_ECD_DISK_NUMBER] = disk_num
195 endrec[_ECD_DISK_START] = disk_dir
196 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
197 endrec[_ECD_ENTRIES_TOTAL] = dircount2
198 endrec[_ECD_SIZE] = dirsize
199 endrec[_ECD_OFFSET] = diroffset
200 return endrec
201
202
203 def _EndRecData(fpin):
204 """Return data from the "End of Central Directory" record, or None.
205
206 The data is a list of the nine items in the ZIP "End of central dir"
207 record followed by a tenth item, the file seek offset of this record."""
208
209 # Determine file size
210 fpin.seek(0, 2)
211 filesize = fpin.tell()
212
213 # Check to see if this is ZIP file with no archive comment (the
214 # "end of central directory" structure should be the last item in the
215 # file if this is the case).
216 try:
217 fpin.seek(-sizeEndCentDir, 2)
218 except IOError:
219 return None
220 data = fpin.read()
221 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
222 # the signature is correct and there's no comment, unpack structure
223 endrec = struct.unpack(structEndArchive, data)
224 endrec=list(endrec)
225
226 # Append a blank comment and record start offset
227 endrec.append(b"")
228 endrec.append(filesize - sizeEndCentDir)
229
230 # Try to read the "Zip64 end of central directory" structure
231 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
232
233 # Either this is not a ZIP file, or it is a ZIP file with an archive
234 # comment. Search the end of the file for the "end of central directory"
235 # record signature. The comment is the last item in the ZIP file and may be
236 # up to 64K long. It is assumed that the "end of central directory" magic
237 # number does not appear in the comment.
238 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
239 fpin.seek(maxCommentStart, 0)
240 data = fpin.read()
241 start = data.rfind(stringEndArchive)
242 if start >= 0:
243 # found the magic number; attempt to unpack and interpret
244 recData = data[start:start+sizeEndCentDir]
245 endrec = list(struct.unpack(structEndArchive, recData))
246 comment = data[start+sizeEndCentDir:]
247 # check that comment length is correct
248 if endrec[_ECD_COMMENT_SIZE] == len(comment):
249 # Append the archive comment and start offset
250 endrec.append(comment)
251 endrec.append(maxCommentStart + start)
252
253 # Try to read the "Zip64 end of central directory" structure
254 return _EndRecData64(fpin, maxCommentStart + start - filesize,
255 endrec)
256
257 # Unable to find a valid end of central directory structure
258 return
259
260
261 class ZipInfo (object):
262 """Class with attributes describing each file in the ZIP archive."""
263
264 __slots__ = (
265 'orig_filename',
266 'filename',
267 'date_time',
268 'compress_type',
269 'comment',
270 'extra',
271 'create_system',
272 'create_version',
273 'extract_version',
274 'reserved',
275 'flag_bits',
276 'volume',
277 'internal_attr',
278 'external_attr',
279 'header_offset',
280 'CRC',
281 'compress_size',
282 'file_size',
283 '_raw_time',
284 )
285
286 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
287 self.orig_filename = filename # Original file name in archive
288
289 # Terminate the file name at the first null byte. Null bytes in file
290 # names are used as tricks by viruses in archives.
291 null_byte = filename.find(chr(0))
292 if null_byte >= 0:
293 filename = filename[0:null_byte]
294 # This is used to ensure paths in generated ZIP files always use
295 # forward slashes as the directory separator, as required by the
296 # ZIP format specification.
297 if os.sep != "/" and os.sep in filename:
298 filename = filename.replace(os.sep, "/")
299
300 self.filename = filename # Normalized file name
301 self.date_time = date_time # year, month, day, hour, min, sec
302 # Standard values:
303 self.compress_type = ZIP_STORED # Type of compression for the file
304 self.comment = b"" # Comment for each file
305 self.extra = b"" # ZIP extra data
306 if sys.platform == 'win32':
307 self.create_system = 0 # System which created ZIP archive
308 else:
309 # Assume everything else is unix-y
310 self.create_system = 3 # System which created ZIP archive
311 self.create_version = 20 # Version which created ZIP archive
312 self.extract_version = 20 # Version needed to extract archive
313 self.reserved = 0 # Must be zero
314 self.flag_bits = 0 # ZIP flag bits
315 self.volume = 0 # Volume number of file header
316 self.internal_attr = 0 # Internal attributes
317 self.external_attr = 0 # External file attributes
318 # Other attributes are set by class ZipFile:
319 # header_offset Byte offset to the file header
320 # CRC CRC-32 of the uncompressed file
321 # compress_size Size of the compressed file
322 # file_size Size of the uncompressed file
323
324 def FileHeader(self):
325 """Return the per-file header as a string."""
326 dt = self.date_time
327 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
328 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
329 if self.flag_bits & 0x08:
330 # Set these to zero because we write them after the file data
331 CRC = compress_size = file_size = 0
332 else:
333 CRC = self.CRC
334 compress_size = self.compress_size
335 file_size = self.file_size
336
337 extra = self.extra
338
339 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
340 # File is larger than what fits into a 4 byte integer,
341 # fall back to the ZIP64 extension
342 fmt = '<HHQQ'
343 extra = extra + struct.pack(fmt,
344 1, struct.calcsize(fmt)-4, file_size, compress_size)
345 file_size = 0xffffffff
346 compress_size = 0xffffffff
347 self.extract_version = max(45, self.extract_version)
348 self.create_version = max(45, self.extract_version)
349
350 filename, flag_bits = self._encodeFilenameFlags()
351 header = struct.pack(structFileHeader, stringFileHeader,
352 self.extract_version, self.reserved, flag_bits,
353 self.compress_type, dostime, dosdate, CRC,
354 compress_size, file_size,
355 len(filename), len(extra))
356 return header + filename + extra
357
358 def _encodeFilenameFlags(self):
359 try:
360 return self.filename.encode('ascii'), self.flag_bits
361 except UnicodeEncodeError:
362 return self.filename.encode('utf-8'), self.flag_bits | 0x800
363
364 def _decodeExtra(self):
365 # Try to decode the extra field.
366 extra = self.extra
367 unpack = struct.unpack
368 while extra:
369 tp, ln = unpack('<HH', extra[:4])
370 if tp == 1:
371 if ln >= 24:
372 counts = unpack('<QQQ', extra[4:28])
373 elif ln == 16:
374 counts = unpack('<QQ', extra[4:20])
375 elif ln == 8:
376 counts = unpack('<Q', extra[4:12])
377 elif ln == 0:
378 counts = ()
379 else:
380 raise RuntimeError("Corrupt extra field %s"%(ln,))
381
382 idx = 0
383
384 # ZIP64 extension (large files and/or large archives)
385 if self.file_size in (0xffffffffffffffff, 0xffffffff):
386 self.file_size = counts[idx]
387 idx += 1
388
389 if self.compress_size == 0xFFFFFFFF:
390 self.compress_size = counts[idx]
391 idx += 1
392
393 if self.header_offset == 0xffffffff:
394 old = self.header_offset
395 self.header_offset = counts[idx]
396 idx+=1
397
398 extra = extra[ln+4:]
399
400
401 class _ZipDecrypter:
402 """Class to handle decryption of files stored within a ZIP archive.
403
404 ZIP supports a password-based form of encryption. Even though known
405 plaintext attacks have been found against it, it is still useful
406 to be able to get data out of such a file.
407
408 Usage:
409 zd = _ZipDecrypter(mypwd)
410 plain_char = zd(cypher_char)
411 plain_text = map(zd, cypher_text)
412 """
413
414 def _GenerateCRCTable():
415 """Generate a CRC-32 table.
416
417 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
418 internal keys. We noticed that a direct implementation is faster than
419 relying on binascii.crc32().
420 """
421 poly = 0xedb88320
422 table = [0] * 256
423 for i in range(256):
424 crc = i
425 for j in range(8):
426 if crc & 1:
427 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
428 else:
429 crc = ((crc >> 1) & 0x7FFFFFFF)
430 table[i] = crc
431 return table
432 crctable = _GenerateCRCTable()
433
434 def _crc32(self, ch, crc):
435 """Compute the CRC32 primitive on one byte."""
436 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
437
438 def __init__(self, pwd):
439 self.key0 = 305419896
440 self.key1 = 591751049
441 self.key2 = 878082192
442 for p in pwd:
443 self._UpdateKeys(p)
444
445 def _UpdateKeys(self, c):
446 self.key0 = self._crc32(c, self.key0)
447 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
448 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
449 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
450
451 def __call__(self, c):
452 """Decrypt a single character."""
453 assert isinstance(c, int)
454 k = self.key2 | 2
455 c = c ^ (((k * (k^1)) >> 8) & 255)
456 self._UpdateKeys(c)
457 return c
458
459 class ZipExtFile:
460 """File-like object for reading an archive member.
461 Is returned by ZipFile.open().
462 """
463
464 def __init__(self, fileobj, zipinfo, decrypt=None):
465 self.fileobj = fileobj
466 self.decrypter = decrypt
467 self.bytes_read = 0
468 self.rawbuffer = b''
469 self.readbuffer = b''
470 self.linebuffer = b''
471 self.eof = False
472 self.univ_newlines = False
473 self.nlSeps = (b"\n", )
474 self.lastdiscard = b''
475
476 self.compress_type = zipinfo.compress_type
477 self.compress_size = zipinfo.compress_size
478
479 self.closed = False
480 self.mode = "r"
481 self.name = zipinfo.filename
482
483 # read from compressed files in 64k blocks
484 self.compreadsize = 64*1024
485 if self.compress_type == ZIP_DEFLATED:
486 self.dc = zlib.decompressobj(-15)
487 elif self.compress_type == ZIP_BZIP2:
488 self.dc = bz2.BZ2Decompressor()
489 self.compreadsize = 900000
490
491 if hasattr(zipinfo, 'CRC'):
492 self._expected_crc = zipinfo.CRC
493 self._running_crc = crc32(b'') & 0xffffffff
494 else:
495 self._expected_crc = None
496
497 def set_univ_newlines(self, univ_newlines):
498 self.univ_newlines = univ_newlines
499
500 # pick line separator char(s) based on universal newlines flag
501 self.nlSeps = (b"\n", )
502 if self.univ_newlines:
503 self.nlSeps = (b"\r\n", b"\r", b"\n")
504
505 def __iter__(self):
506 return self
507
508 def __next__(self):
509 nextline = self.readline()
510 if not nextline:
511 raise StopIteration()
512
513 return nextline
514
515 def close(self):
516 self.closed = True
517
518 def _checkfornewline(self):
519 nl, nllen = -1, -1
520 if self.linebuffer:
521 # ugly check for cases where half of an \r\n pair was
522 # read on the last pass, and the \r was discarded. In this
523 # case we just throw away the \n at the start of the buffer.
524 if (self.lastdiscard, self.linebuffer[:1]) == (b'\r', b'\n'):
525 self.linebuffer = self.linebuffer[1:]
526
527 for sep in self.nlSeps:
528 nl = self.linebuffer.find(sep)
529 if nl >= 0:
530 nllen = len(sep)
531 return nl, nllen
532
533 return nl, nllen
534
535 def readline(self, size = -1):
536 """Read a line with approx. size. If size is negative,
537 read a whole line.
538 """
539 if size < 0:
540 size = sys.maxsize
541 elif size == 0:
542 return b''
543
544 # check for a newline already in buffer
545 nl, nllen = self._checkfornewline()
546
547 if nl >= 0:
548 # the next line was already in the buffer
549 nl = min(nl, size)
550 else:
551 # no line break in buffer - try to read more
552 size -= len(self.linebuffer)
553 while nl < 0 and size > 0:
554 buf = self.read(min(size, 100))
555 if not buf:
556 break
557 self.linebuffer += buf
558 size -= len(buf)
559
560 # check for a newline in buffer
561 nl, nllen = self._checkfornewline()
562
563 # we either ran out of bytes in the file, or
564 # met the specified size limit without finding a newline,
565 # so return current buffer
566 if nl < 0:
567 s = self.linebuffer
568 self.linebuffer = b''
569 return s
570
571 buf = self.linebuffer[:nl]
572 self.lastdiscard = self.linebuffer[nl:nl + nllen]
573 self.linebuffer = self.linebuffer[nl + nllen:]
574
575 # line is always returned with \n as newline char (except possibly
576 # for a final incomplete line in the file, which is handled above).
577 return buf + b"\n"
578
579 def readlines(self, sizehint = -1):
580 """Return a list with all (following) lines. The sizehint parameter
581 is ignored in this implementation.
582 """
583 result = []
584 while True:
585 line = self.readline()
586 if not line: break
587 result.append(line)
588 return result
589
590 def _update_crc(self, newdata, eof):
591 # Update the CRC using the given data.
592 if self._expected_crc is None:
593 # No need to compute the CRC if we don't have a reference value
594 return
595 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
596 # Check the CRC if we're at the end of the file
597 if eof and self._running_crc != self._expected_crc:
598 raise BadZipfile("Bad CRC-32 for file %r" % self.name)
599
600 def read(self, size = None):
601 # act like file obj and return empty string if size is 0
602 if size == 0:
603 return b''
604
605 # determine read size
606 bytesToRead = self.compress_size - self.bytes_read
607
608 # adjust read size for encrypted files since the first 12 bytes
609 # are for the encryption/password information
610 if self.decrypter is not None:
611 bytesToRead -= 12
612
613 if size is not None and size >= 0:
614 if self.compress_type == ZIP_STORED:
615 lr = len(self.readbuffer)
616 bytesToRead = min(bytesToRead, size - lr)
617 else:
618 if len(self.readbuffer) > size:
619 # the user has requested fewer bytes than we've already
620 # pulled through the decompressor; don't read any more
621 bytesToRead = 0
622 else:
623 # user will use up the buffer, so read some more
624 lr = len(self.rawbuffer)
625 bytesToRead = min(bytesToRead, self.compreadsize - lr)
626
627 # avoid reading past end of file contents
628 if bytesToRead + self.bytes_read > self.compress_size:
629 bytesToRead = self.compress_size - self.bytes_read
630
631 # try to read from file (if necessary)
632 if bytesToRead > 0:
633 data = self.fileobj.read(bytesToRead)
634 self.bytes_read += len(data)
635 try:
636 self.rawbuffer += data
637 except:
638 print(repr(self.fileobj), repr(self.rawbuffer),
639 repr(data))
640 raise
641
642 # handle contents of raw buffer
643 if self.rawbuffer:
644 newdata = self.rawbuffer
645 self.rawbuffer = b''
646
647 # decrypt new data if we were given an object to handle that
648 if newdata and self.decrypter is not None:
649 newdata = bytes(map(self.decrypter, newdata))
650
651 # decompress newly read data if necessary
652 if newdata and self.compress_type != ZIP_STORED:
653 newdata = self.dc.decompress(newdata)
654 self.rawbuffer = self.dc.unconsumed_tail if self.compress_type == ZIP_DEFLATED else ''
655 if self.eof and len(self.rawbuffer) == 0:
656 # we're out of raw bytes (both from the file and
657 # the local buffer); flush just to make sure the
658 # decompressor is done
659 try:
660 newdata += self.dc.flush()
661 except AttributeError:
662 pass
663 # prevent decompressor from being used again
664 self.dc = None
665
666 self._update_crc(newdata, eof=(
667 self.compress_size == self.bytes_read and
668 len(self.rawbuffer) == 0))
669 self.readbuffer += newdata
670
671 # return what the user asked for
672 if size is None or len(self.readbuffer) <= size:
673 data = self.readbuffer
674 self.readbuffer = b''
675 else:
676 data = self.readbuffer[:size]
677 self.readbuffer = self.readbuffer[size:]
678
679 return data
680
681
682 class ZipFile:
683 """ Class with methods to open, read, write, close, list zip files.
684
685 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
686
687 file: Either the path to the file, or a file-like object.
688 If it is a path, the file will be opened and closed by ZipFile.
689 mode: The mode can be either read "r", write "w" or append "a".
690 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
691 or ZIP_BZIP2 (requires bz2).
692 allowZip64: if True ZipFile will create files with ZIP64 extensions when
693 needed, otherwise it will raise an exception when this would
694 be necessary.
695
696 """
697
698 fp = None # Set here since __del__ checks it
699
700 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
701 """Open the ZIP file with mode read "r", write "w" or append "a"."""
702 if mode not in ("r", "w", "a"):
703 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
704
705 if compression == ZIP_STORED:
706 pass
707 elif compression == ZIP_DEFLATED:
708 if not zlib:
709 raise RuntimeError(
710 "Compression requires the (missing) zlib module")
711 elif compression == ZIP_BZIP2:
712 if not bz2:
713 raise RuntimeError(
714 "Compression requires the (missing) bz2 module")
715 else:
716 raise RuntimeError("That compression method is not supported")
717
718 self._allowZip64 = allowZip64
719 self._didModify = False
720 self.debug = 0 # Level of printing: 0 through 3
721 self.NameToInfo = {} # Find file info given name
722 self.filelist = [] # List of ZipInfo instances for archive
723 self.compression = compression # Method of compression
724 self.mode = key = mode.replace('b', '')[0]
725 self.pwd = None
726 self.comment = b''
727
728 # Check if we were passed a file-like object
729 if isinstance(file, str):
730 # No, it's a filename
731 self._filePassed = 0
732 self.filename = file
733 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
734 try:
735 self.fp = io.open(file, modeDict[mode])
736 except IOError:
737 if mode == 'a':
738 mode = key = 'w'
739 self.fp = io.open(file, modeDict[mode])
740 else:
741 raise
742 else:
743 self._filePassed = 1
744 self.fp = file
745 self.filename = getattr(file, 'name', None)
746
747 if key == 'r':
748 self._GetContents()
749 elif key == 'w':
750 # set the modified flag so central directory gets written
751 # even if no files are added to the archive
752 self._didModify = True
753 elif key == 'a':
754 try:
755 # See if file is a zip file
756 self._RealGetContents()
757 # seek to start of directory and overwrite
758 self.fp.seek(self.start_dir, 0)
759 except BadZipfile:
760 # file is not a zip file, just append
761 self.fp.seek(0, 2)
762
763 # set the modified flag so central directory gets written
764 # even if no files are added to the archive
765 self._didModify = True
766 else:
767 if not self._filePassed:
768 self.fp.close()
769 self.fp = None
770 raise RuntimeError('Mode must be "r", "w" or "a"')
771
772 def _GetContents(self):
773 """Read the directory, making sure we close the file if the format
774 is bad."""
775 try:
776 self._RealGetContents()
777 except BadZipfile:
778 if not self._filePassed:
779 self.fp.close()
780 self.fp = None
781 raise
782
783 def _RealGetContents(self):
784 """Read in the table of contents for the ZIP file."""
785 fp = self.fp
786 try:
787 endrec = _EndRecData(fp)
788 except IOError:
789 raise BadZipfile("File is not a zip file")
790 if not endrec:
791 raise BadZipfile("File is not a zip file")
792 if self.debug > 1:
793 print(endrec)
794 size_cd = endrec[_ECD_SIZE] # bytes in central directory
795 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
796 self.comment = endrec[_ECD_COMMENT] # archive comment
797
798 # "concat" is zero, unless zip was concatenated to another file
799 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
800 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
801 # If Zip64 extension structures are present, account for them
802 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
803
804 if self.debug > 2:
805 inferred = concat + offset_cd
806 print("given, inferred, offset", offset_cd, inferred, concat)
807 # self.start_dir: Position of start of central directory
808 self.start_dir = offset_cd + concat
809 fp.seek(self.start_dir, 0)
810 data = fp.read(size_cd)
811 fp = io.BytesIO(data)
812 total = 0
813 while total < size_cd:
814 centdir = fp.read(sizeCentralDir)
815 if centdir[0:4] != stringCentralDir:
816 raise BadZipfile("Bad magic number for central directory")
817 centdir = struct.unpack(structCentralDir, centdir)
818 if self.debug > 2:
819 print(centdir)
820 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
821 flags = centdir[5]
822 if flags & 0x800:
823 # UTF-8 file names extension
824 filename = filename.decode('utf-8')
825 else:
826 # Historical ZIP filename encoding
827 filename = filename.decode('cp437')
828 # Create ZipInfo instance to store file information
829 x = ZipInfo(filename)
830 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
831 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
832 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
833 (x.create_version, x.create_system, x.extract_version, x.reserved,
834 x.flag_bits, x.compress_type, t, d,
835 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
836 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
837 # Convert date/time code to (year, month, day, hour, min, sec)
838 x._raw_time = t
839 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
840 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
841
842 x._decodeExtra()
843 x.header_offset = x.header_offset + concat
844 self.filelist.append(x)
845 self.NameToInfo[x.filename] = x
846
847 # update total bytes read from central directory
848 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
849 + centdir[_CD_EXTRA_FIELD_LENGTH]
850 + centdir[_CD_COMMENT_LENGTH])
851
852 if self.debug > 2:
853 print("total", total)
854
855
856 def namelist(self):
857 """Return a list of file names in the archive."""
858 l = []
859 for data in self.filelist:
860 l.append(data.filename)
861 return l
862
863 def infolist(self):
864 """Return a list of class ZipInfo instances for files in the
865 archive."""
866 return self.filelist
867
868 def printdir(self, file=None):
869 """Print a table of contents for the zip file."""
870 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
871 file=file)
872 for zinfo in self.filelist:
873 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
874 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
875 file=file)
876
877 def testzip(self):
878 """Read all the files and check the CRC."""
879 chunk_size = 2 ** 20
880 for zinfo in self.filelist:
881 try:
882 # Read by chunks, to avoid an OverflowError or a
883 # MemoryError with very large embedded files.
884 f = self.open(zinfo.filename, "r")
885 while f.read(chunk_size): # Check CRC-32
886 pass
887 except BadZipfile:
888 return zinfo.filename
889
890 def getinfo(self, name):
891 """Return the instance of ZipInfo given 'name'."""
892 info = self.NameToInfo.get(name)
893 if info is None:
894 raise KeyError(
895 'There is no item named %r in the archive' % name)
896
897 return info
898
899 def setpassword(self, pwd):
900 """Set default password for encrypted files."""
901 assert isinstance(pwd, bytes)
902 self.pwd = pwd
903
904 def read(self, name, pwd=None):
905 """Return file bytes (as a string) for name."""
906 return self.open(name, "r", pwd).read()
907
908 def open(self, name, mode="r", pwd=None):
909 """Return file-like object for 'name'."""
910 if mode not in ("r", "U", "rU"):
911 raise RuntimeError('open() requires mode "r", "U", or "rU"')
912 if not self.fp:
913 raise RuntimeError(
914 "Attempt to read ZIP archive that was already closed")
915
916 # Only open a new file for instances where we were not
917 # given a file object in the constructor
918 if self._filePassed:
919 zef_file = self.fp
920 else:
921 zef_file = io.open(self.filename, 'rb')
922
923 # Make sure we have an info object
924 if isinstance(name, ZipInfo):
925 # 'name' is already an info object
926 zinfo = name
927 else:
928 # Get info object for name
929 zinfo = self.getinfo(name)
930
931 zef_file.seek(zinfo.header_offset, 0)
932
933 # Skip the file header:
934 fheader = zef_file.read(sizeFileHeader)
935 if fheader[0:4] != stringFileHeader:
936 raise BadZipfile("Bad magic number for file header")
937
938 fheader = struct.unpack(structFileHeader, fheader)
939 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
940 if fheader[_FH_EXTRA_FIELD_LENGTH]:
941 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
942
943 if fname != zinfo.orig_filename.encode("utf-8"):
944 raise BadZipfile(
945 'File name in directory %r and header %r differ.'
946 % (zinfo.orig_filename, fname))
947
948 # check for encrypted flag & handle password
949 is_encrypted = zinfo.flag_bits & 0x1
950 zd = None
951 if is_encrypted:
952 if not pwd:
953 pwd = self.pwd
954 if not pwd:
955 raise RuntimeError("File %s is encrypted, "
956 "password required for extraction" % name)
957
958 zd = _ZipDecrypter(pwd)
959 # The first 12 bytes in the cypher stream is an encryption header
960 # used to strengthen the algorithm. The first 11 bytes are
961 # completely random, while the 12th contains the MSB of the CRC,
962 # or the MSB of the file time depending on the header type
963 # and is used to check the correctness of the password.
964 bytes = zef_file.read(12)
965 h = list(map(zd, bytes[0:12]))
966 if zinfo.flag_bits & 0x8:
967 # compare against the file type from extended local headers
968 check_byte = (zinfo._raw_time >> 8) & 0xff
969 else:
970 # compare against the CRC otherwise
971 check_byte = (zinfo.CRC >> 24) & 0xff
972 if h[11] != check_byte:
973 raise RuntimeError("Bad password for file", name)
974
975 # build and return a ZipExtFile
976 if zd is None:
977 zef = ZipExtFile(zef_file, zinfo)
978 else:
979 zef = ZipExtFile(zef_file, zinfo, zd)
980
981 # set universal newlines on ZipExtFile if necessary
982 if "U" in mode:
983 zef.set_univ_newlines(True)
984 return zef
985
986 def extract(self, member, path=None, pwd=None):
987 """Extract a member from the archive to the current working directory,
988 using its full name. Its file information is extracted as accurately
989 as possible. `member' may be a filename or a ZipInfo object. You can
990 specify a different directory using `path'.
991 """
992 if not isinstance(member, ZipInfo):
993 member = self.getinfo(member)
994
995 if path is None:
996 path = os.getcwd()
997
998 return self._extract_member(member, path, pwd)
999
1000 def extractall(self, path=None, members=None, pwd=None):
1001 """Extract all members from the archive to the current working
1002 directory. `path' specifies a different directory to extract to.
1003 `members' is optional and must be a subset of the list returned
1004 by namelist().
1005 """
1006 if members is None:
1007 members = self.namelist()
1008
1009 for zipinfo in members:
1010 self.extract(zipinfo, path, pwd)
1011
1012 def _extract_member(self, member, targetpath, pwd):
1013 """Extract the ZipInfo object 'member' to a physical
1014 file on the path targetpath.
1015 """
1016 # build the destination pathname, replacing
1017 # forward slashes to platform specific separators.
1018 # Strip trailing path separator, unless it represents the root.
1019 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
1020 and len(os.path.splitdrive(targetpath)[1]) > 1):
1021 targetpath = targetpath[:-1]
1022
1023 # don't include leading "/" from file name if present
1024 if member.filename[0] == '/':
1025 targetpath = os.path.join(targetpath, member.filename[1:])
1026 else:
1027 targetpath = os.path.join(targetpath, member.filename)
1028
1029 targetpath = os.path.normpath(targetpath)
1030
1031 # Create all upper directories if necessary.
1032 upperdirs = os.path.dirname(targetpath)
1033 if upperdirs and not os.path.exists(upperdirs):
1034 os.makedirs(upperdirs)
1035
1036 if member.filename[-1] == '/':
1037 if not os.path.isdir(targetpath):
1038 os.mkdir(targetpath)
1039 return targetpath
1040
1041 source = self.open(member, pwd=pwd)
1042 target = open(targetpath, "wb")
1043 shutil.copyfileobj(source, target)
1044 source.close()
1045 target.close()
1046
1047 return targetpath
1048
1049 def _writecheck(self, zinfo):
1050 """Check for errors before writing a file to the archive."""
1051 if zinfo.filename in self.NameToInfo:
1052 if self.debug: # Warning for duplicate names
1053 print("Duplicate name:", zinfo.filename)
1054 if self.mode not in ("w", "a"):
1055 raise RuntimeError('write() requires mode "w" or "a"')
1056 if not self.fp:
1057 raise RuntimeError(
1058 "Attempt to write ZIP archive that was already closed")
1059 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1060 raise RuntimeError(
1061 "Compression requires the (missing) zlib module")
1062 if zinfo.compress_type == ZIP_BZIP2 and not bz2:
1063 raise RuntimeError(
1064 "Compression requires the (missing) bz2 module")
1065 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED, ZIP_BZIP2):
1066 raise RuntimeError("That compression method is not supported")
1067 if zinfo.file_size > ZIP64_LIMIT:
1068 if not self._allowZip64:
1069 raise LargeZipFile("Filesize would require ZIP64 extensions")
1070 if zinfo.header_offset > ZIP64_LIMIT:
1071 if not self._allowZip64:
1072 raise LargeZipFile(
1073 "Zipfile size would require ZIP64 extensions")
1074
1075 def write(self, filename, arcname=None, compress_type=None):
1076 """Put the bytes from filename into the archive under the name
1077 arcname."""
1078 if not self.fp:
1079 raise RuntimeError(
1080 "Attempt to write to ZIP archive that was already closed")
1081
1082 st = os.stat(filename)
1083 isdir = stat.S_ISDIR(st.st_mode)
1084 mtime = time.localtime(st.st_mtime)
1085 date_time = mtime[0:6]
1086 # Create ZipInfo instance to store file information
1087 if arcname is None:
1088 arcname = filename
1089 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1090 while arcname[0] in (os.sep, os.altsep):
1091 arcname = arcname[1:]
1092 if isdir:
1093 arcname += '/'
1094 zinfo = ZipInfo(arcname, date_time)
1095 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
1096 if compress_type is None:
1097 zinfo.compress_type = self.compression
1098 else:
1099 zinfo.compress_type = compress_type
1100
1101 zinfo.file_size = st.st_size
1102 zinfo.flag_bits = 0x00
1103 zinfo.header_offset = self.fp.tell() # Start of header bytes
1104
1105 self._writecheck(zinfo)
1106 self._didModify = True
1107
1108 if isdir:
1109 zinfo.file_size = 0
1110 zinfo.compress_size = 0
1111 zinfo.CRC = 0
1112 self.filelist.append(zinfo)
1113 self.NameToInfo[zinfo.filename] = zinfo
1114 self.fp.write(zinfo.FileHeader())
1115 return
1116
1117 with open(filename, "rb") as fp:
1118 # Must overwrite CRC and sizes with correct data later
1119 zinfo.CRC = CRC = 0
1120 zinfo.compress_size = compress_size = 0
1121 zinfo.file_size = file_size = 0
1122 self.fp.write(zinfo.FileHeader())
1123 if zinfo.compress_type == ZIP_DEFLATED:
1124 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1125 zlib.DEFLATED, -15)
1126 elif zinfo.compress_type == ZIP_BZIP2:
1127 cmpr = bz2.BZ2Compressor()
1128 else:
1129 cmpr = None
1130 while 1:
1131 buf = fp.read(1024 * 8)
1132 if not buf:
1133 break
1134 file_size = file_size + len(buf)
1135 CRC = crc32(buf, CRC) & 0xffffffff
1136 if cmpr:
1137 buf = cmpr.compress(buf)
1138 compress_size = compress_size + len(buf)
1139 self.fp.write(buf)
1140 if cmpr:
1141 buf = cmpr.flush()
1142 compress_size = compress_size + len(buf)
1143 self.fp.write(buf)
1144 zinfo.compress_size = compress_size
1145 else:
1146 zinfo.compress_size = file_size
1147 zinfo.CRC = CRC
1148 zinfo.file_size = file_size
1149 # Seek backwards and write CRC and file sizes
1150 position = self.fp.tell() # Preserve current position in file
1151 self.fp.seek(zinfo.header_offset + 14, 0)
1152 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1153 zinfo.file_size))
1154 self.fp.seek(position, 0)
1155 self.filelist.append(zinfo)
1156 self.NameToInfo[zinfo.filename] = zinfo
1157
1158 def writestr(self, zinfo_or_arcname, data):
1159 """Write a file into the archive. The contents is 'data', which
1160 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1161 it is encoded as UTF-8 first.
1162 'zinfo_or_arcname' is either a ZipInfo instance or
1163 the name of the file in the archive."""
1164 if isinstance(data, str):
1165 data = data.encode("utf-8")
1166 if not isinstance(zinfo_or_arcname, ZipInfo):
1167 zinfo = ZipInfo(filename=zinfo_or_arcname,
1168 date_time=time.localtime(time.time())[:6])
1169 zinfo.compress_type = self.compression
1170 zinfo.external_attr = 0o600 << 16
1171 else:
1172 zinfo = zinfo_or_arcname
1173
1174 if not self.fp:
1175 raise RuntimeError(
1176 "Attempt to write to ZIP archive that was already closed")
1177
1178 zinfo.file_size = len(data) # Uncompressed size
1179 zinfo.header_offset = self.fp.tell() # Start of header data
1180 self._writecheck(zinfo)
1181 self._didModify = True
1182 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
1183 if zinfo.compress_type == ZIP_DEFLATED:
1184 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1185 zlib.DEFLATED, -15)
1186 data = co.compress(data) + co.flush()
1187 zinfo.compress_size = len(data) # Compressed size
1188 elif zinfo.compress_type == ZIP_BZIP2:
1189 co = bz2.BZ2Compressor()
1190 data = co.compress(data) + co.flush()
1191 zinfo.compress_size = len(data) # Compressed size
1192 else:
1193 zinfo.compress_size = zinfo.file_size
1194 zinfo.header_offset = self.fp.tell() # Start of header data
1195 self.fp.write(zinfo.FileHeader())
1196 self.fp.write(data)
1197 self.fp.flush()
1198 if zinfo.flag_bits & 0x08:
1199 # Write CRC and file sizes after the file data
1200 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1201 zinfo.file_size))
1202 self.filelist.append(zinfo)
1203 self.NameToInfo[zinfo.filename] = zinfo
1204
1205 def __del__(self):
1206 """Call the "close()" method in case the user forgot."""
1207 self.close()
1208
1209 def close(self):
1210 """Close the file, and for mode "w" and "a" write the ending
1211 records."""
1212 if self.fp is None:
1213 return
1214
1215 if self.mode in ("w", "a") and self._didModify: # write ending records
1216 count = 0
1217 pos1 = self.fp.tell()
1218 for zinfo in self.filelist: # write central directory
1219 count = count + 1
1220 dt = zinfo.date_time
1221 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1222 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1223 extra = []
1224 if zinfo.file_size > ZIP64_LIMIT \
1225 or zinfo.compress_size > ZIP64_LIMIT:
1226 extra.append(zinfo.file_size)
1227 extra.append(zinfo.compress_size)
1228 file_size = 0xffffffff
1229 compress_size = 0xffffffff
1230 else:
1231 file_size = zinfo.file_size
1232 compress_size = zinfo.compress_size
1233
1234 if zinfo.header_offset > ZIP64_LIMIT:
1235 extra.append(zinfo.header_offset)
1236 header_offset = 0xffffffff
1237 else:
1238 header_offset = zinfo.header_offset
1239
1240 extra_data = zinfo.extra
1241 if extra:
1242 # Append a ZIP64 field to the extra's
1243 extra_data = struct.pack(
1244 '<HH' + 'Q'*len(extra),
1245 1, 8*len(extra), *extra) + extra_data
1246
1247 extract_version = max(45, zinfo.extract_version)
1248 create_version = max(45, zinfo.create_version)
1249 else:
1250 extract_version = zinfo.extract_version
1251 create_version = zinfo.create_version
1252
1253 try:
1254 filename, flag_bits = zinfo._encodeFilenameFlags()
1255 centdir = struct.pack(structCentralDir,
1256 stringCentralDir, create_version,
1257 zinfo.create_system, extract_version, zinfo.reserved,
1258 flag_bits, zinfo.compress_type, dostime, dosdate,
1259 zinfo.CRC, compress_size, file_size,
1260 len(filename), len(extra_data), len(zinfo.comment),
1261 0, zinfo.internal_attr, zinfo.external_attr,
1262 header_offset)
1263 except DeprecationWarning:
1264 print((structCentralDir, stringCentralDir, create_version,
1265 zinfo.create_system, extract_version, zinfo.reserved,
1266 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1267 zinfo.CRC, compress_size, file_size,
1268 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1269 0, zinfo.internal_attr, zinfo.external_attr,
1270 header_offset), file=sys.stderr)
1271 raise
1272 self.fp.write(centdir)
1273 self.fp.write(filename)
1274 self.fp.write(extra_data)
1275 self.fp.write(zinfo.comment)
1276
1277 pos2 = self.fp.tell()
1278 # Write end-of-zip-archive record
1279 centDirCount = count
1280 centDirSize = pos2 - pos1
1281 centDirOffset = pos1
1282 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1283 centDirOffset > ZIP64_LIMIT or
1284 centDirSize > ZIP64_LIMIT):
1285 # Need to write the ZIP64 end-of-archive records
1286 zip64endrec = struct.pack(
1287 structEndArchive64, stringEndArchive64,
1288 44, 45, 45, 0, 0, centDirCount, centDirCount,
1289 centDirSize, centDirOffset)
1290 self.fp.write(zip64endrec)
1291
1292 zip64locrec = struct.pack(
1293 structEndArchive64Locator,
1294 stringEndArchive64Locator, 0, pos2, 1)
1295 self.fp.write(zip64locrec)
1296 centDirCount = min(centDirCount, 0xFFFF)
1297 centDirSize = min(centDirSize, 0xFFFFFFFF)
1298 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1299
1300 # check for valid comment length
1301 if len(self.comment) >= ZIP_MAX_COMMENT:
1302 if self.debug > 0:
1303 msg = 'Archive comment is too long; truncating to %d bytes' \
1304 % ZIP_MAX_COMMENT
1305 self.comment = self.comment[:ZIP_MAX_COMMENT]
1306
1307 endrec = struct.pack(structEndArchive, stringEndArchive,
1308 0, 0, centDirCount, centDirCount,
1309 centDirSize, centDirOffset, len(self.comment))
1310 self.fp.write(endrec)
1311 self.fp.write(self.comment)
1312 self.fp.flush()
1313
1314 if not self._filePassed:
1315 self.fp.close()
1316 self.fp = None
1317
1318
1319 class PyZipFile(ZipFile):
1320 """Class to create ZIP archives with Python library files and packages."""
1321
1322 def writepy(self, pathname, basename=""):
1323 """Add all files from "pathname" to the ZIP archive.
1324
1325 If pathname is a package directory, search the directory and
1326 all package subdirectories recursively for all *.py and enter
1327 the modules into the archive. If pathname is a plain
1328 directory, listdir *.py and enter all modules. Else, pathname
1329 must be a Python *.py file and the module will be put into the
1330 archive. Added modules are always module.pyo or module.pyc.
1331 This method will compile the module.py into module.pyc if
1332 necessary.
1333 """
1334 dir, name = os.path.split(pathname)
1335 if os.path.isdir(pathname):
1336 initname = os.path.join(pathname, "__init__.py")
1337 if os.path.isfile(initname):
1338 # This is a package directory, add it
1339 if basename:
1340 basename = "%s/%s" % (basename, name)
1341 else:
1342 basename = name
1343 if self.debug:
1344 print("Adding package in", pathname, "as", basename)
1345 fname, arcname = self._get_codename(initname[0:-3], basename)
1346 if self.debug:
1347 print("Adding", arcname)
1348 self.write(fname, arcname)
1349 dirlist = os.listdir(pathname)
1350 dirlist.remove("__init__.py")
1351 # Add all *.py files and package subdirectories
1352 for filename in dirlist:
1353 path = os.path.join(pathname, filename)
1354 root, ext = os.path.splitext(filename)
1355 if os.path.isdir(path):
1356 if os.path.isfile(os.path.join(path, "__init__.py")):
1357 # This is a package directory, add it
1358 self.writepy(path, basename) # Recursive call
1359 elif ext == ".py":
1360 fname, arcname = self._get_codename(path[0:-3],
1361 basename)
1362 if self.debug:
1363 print("Adding", arcname)
1364 self.write(fname, arcname)
1365 else:
1366 # This is NOT a package directory, add its files at top level
1367 if self.debug:
1368 print("Adding files from directory", pathname)
1369 for filename in os.listdir(pathname):
1370 path = os.path.join(pathname, filename)
1371 root, ext = os.path.splitext(filename)
1372 if ext == ".py":
1373 fname, arcname = self._get_codename(path[0:-3],
1374 basename)
1375 if self.debug:
1376 print("Adding", arcname)
1377 self.write(fname, arcname)
1378 else:
1379 if pathname[-3:] != ".py":
1380 raise RuntimeError(
1381 'Files added with writepy() must end with ".py"')
1382 fname, arcname = self._get_codename(pathname[0:-3], basename)
1383 if self.debug:
1384 print("Adding file", arcname)
1385 self.write(fname, arcname)
1386
1387 def _get_codename(self, pathname, basename):
1388 """Return (filename, archivename) for the path.
1389
1390 Given a module name path, return the correct file path and
1391 archive name, compiling if necessary. For example, given
1392 /python/lib/string, return (/python/lib/string.pyc, string).
1393 """
1394 file_py = pathname + ".py"
1395 file_pyc = pathname + ".pyc"
1396 file_pyo = pathname + ".pyo"
1397 if os.path.isfile(file_pyo) and \
1398 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1399 fname = file_pyo # Use .pyo file
1400 elif not os.path.isfile(file_pyc) or \
1401 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1402 import py_compile
1403 if self.debug:
1404 print("Compiling", file_py)
1405 try:
1406 py_compile.compile(file_py, file_pyc, None, True)
1407 except py_compile.PyCompileError as err:
1408 print(err.msg)
1409 fname = file_pyc
1410 else:
1411 fname = file_pyc
1412 archivename = os.path.split(fname)[1]
1413 if basename:
1414 archivename = "%s/%s" % (basename, archivename)
1415 return (fname, archivename)
1416
1417
1418 def main(args = None):
1419 import textwrap
1420 USAGE=textwrap.dedent("""\
1421 Usage:
1422 zipfile.py -l zipfile.zip # Show listing of a zipfile
1423 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1424 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1425 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1426 """)
1427 if args is None:
1428 args = sys.argv[1:]
1429
1430 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1431 print(USAGE)
1432 sys.exit(1)
1433
1434 if args[0] == '-l':
1435 if len(args) != 2:
1436 print(USAGE)
1437 sys.exit(1)
1438 zf = ZipFile(args[1], 'r')
1439 zf.printdir()
1440 zf.close()
1441
1442 elif args[0] == '-t':
1443 if len(args) != 2:
1444 print(USAGE)
1445 sys.exit(1)
1446 zf = ZipFile(args[1], 'r')
1447 badfile = zf.testzip()
1448 if badfile:
1449 print("The following enclosed file is corrupted: {!r}".format(badfile))
1450 print("Done testing")
1451
1452 elif args[0] == '-e':
1453 if len(args) != 3:
1454 print(USAGE)
1455 sys.exit(1)
1456
1457 zf = ZipFile(args[1], 'r')
1458 out = args[2]
1459 for path in zf.namelist():
1460 if path.startswith('./'):
1461 tgt = os.path.join(out, path[2:])
1462 else:
1463 tgt = os.path.join(out, path)
1464
1465 tgtdir = os.path.dirname(tgt)
1466 if not os.path.exists(tgtdir):
1467 os.makedirs(tgtdir)
1468 with open(tgt, 'wb') as fp:
1469 fp.write(zf.read(path))
1470 zf.close()
1471
1472 elif args[0] == '-c':
1473 if len(args) < 3:
1474 print(USAGE)
1475 sys.exit(1)
1476
1477 def addToZip(zf, path, zippath):
1478 if os.path.isfile(path):
1479 zf.write(path, zippath, ZIP_DEFLATED)
1480 elif os.path.isdir(path):
1481 for nm in os.listdir(path):
1482 addToZip(zf,
1483 os.path.join(path, nm), os.path.join(zippath, nm))
1484 # else: ignore
1485
1486 zf = ZipFile(args[1], 'w', allowZip64=True)
1487 for src in args[2:]:
1488 addToZip(zf, src, os.path.basename(src))
1489
1490 zf.close()
1491
1492 if __name__ == "__main__":
1493 main()