comparison 2.00/zipfile31.py @ 29:a8cc383b787c

Clean up zipfiles and diff them to stock ones
author Oleg Oshmyan <chortos@inbox.lv>
date Wed, 24 Nov 2010 23:21:31 +0000
parents 2.00/zipfile3.py@3d535503161f
children
comparison
equal deleted inserted replaced
28:3d535503161f 29:a8cc383b787c
1 """
2 Read and write ZIP files.
3
4 XXX references to utf-8 need further investigation.
5 """
6 # Improved by Chortos-2 in 2010 (added bzip2 support)
7 import struct, os, time, sys, shutil
8 import binascii, io, stat
9
10 try:
11 import zlib # We may need its compression method
12 crc32 = zlib.crc32
13 except ImportError:
14 zlib = None
15 crc32 = binascii.crc32
16
17 try:
18 import bz2 # We may need its compression method
19 except ImportError:
20 bz2 = None
21
22 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
23 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", "ZIP_BZIP2" ]
24
25 class BadZipfile(Exception):
26 pass
27
28
29 class LargeZipFile(Exception):
30 """
31 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
32 and those extensions are disabled.
33 """
34
35 error = BadZipfile # The exception raised by this module
36
37 ZIP64_LIMIT = (1 << 31) - 1
38 ZIP_FILECOUNT_LIMIT = 1 << 16
39 ZIP_MAX_COMMENT = (1 << 16) - 1
40
41 # constants for Zip file compression methods
42 ZIP_STORED = 0
43 ZIP_DEFLATED = 8
44 ZIP_BZIP2 = 12
45 # Other ZIP compression methods not supported
46
47 # Below are some formats and associated data for reading/writing headers using
48 # the struct module. The names and structures of headers/records are those used
49 # in the PKWARE description of the ZIP file format:
50 # http://www.pkware.com/documents/casestudies/APPNOTE.TXT
51 # (URL valid as of January 2008)
52
53 # The "end of central directory" structure, magic number, size, and indices
54 # (section V.I in the format document)
55 structEndArchive = b"<4s4H2LH"
56 stringEndArchive = b"PK\005\006"
57 sizeEndCentDir = struct.calcsize(structEndArchive)
58
59 _ECD_SIGNATURE = 0
60 _ECD_DISK_NUMBER = 1
61 _ECD_DISK_START = 2
62 _ECD_ENTRIES_THIS_DISK = 3
63 _ECD_ENTRIES_TOTAL = 4
64 _ECD_SIZE = 5
65 _ECD_OFFSET = 6
66 _ECD_COMMENT_SIZE = 7
67 # These last two indices are not part of the structure as defined in the
68 # spec, but they are used internally by this module as a convenience
69 _ECD_COMMENT = 8
70 _ECD_LOCATION = 9
71
72 # The "central directory" structure, magic number, size, and indices
73 # of entries in the structure (section V.F in the format document)
74 structCentralDir = "<4s4B4HL2L5H2L"
75 stringCentralDir = b"PK\001\002"
76 sizeCentralDir = struct.calcsize(structCentralDir)
77
78 # indexes of entries in the central directory structure
79 _CD_SIGNATURE = 0
80 _CD_CREATE_VERSION = 1
81 _CD_CREATE_SYSTEM = 2
82 _CD_EXTRACT_VERSION = 3
83 _CD_EXTRACT_SYSTEM = 4
84 _CD_FLAG_BITS = 5
85 _CD_COMPRESS_TYPE = 6
86 _CD_TIME = 7
87 _CD_DATE = 8
88 _CD_CRC = 9
89 _CD_COMPRESSED_SIZE = 10
90 _CD_UNCOMPRESSED_SIZE = 11
91 _CD_FILENAME_LENGTH = 12
92 _CD_EXTRA_FIELD_LENGTH = 13
93 _CD_COMMENT_LENGTH = 14
94 _CD_DISK_NUMBER_START = 15
95 _CD_INTERNAL_FILE_ATTRIBUTES = 16
96 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
97 _CD_LOCAL_HEADER_OFFSET = 18
98
99 # The "local file header" structure, magic number, size, and indices
100 # (section V.A in the format document)
101 structFileHeader = "<4s2B4HL2L2H"
102 stringFileHeader = b"PK\003\004"
103 sizeFileHeader = struct.calcsize(structFileHeader)
104
105 _FH_SIGNATURE = 0
106 _FH_EXTRACT_VERSION = 1
107 _FH_EXTRACT_SYSTEM = 2
108 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
109 _FH_COMPRESSION_METHOD = 4
110 _FH_LAST_MOD_TIME = 5
111 _FH_LAST_MOD_DATE = 6
112 _FH_CRC = 7
113 _FH_COMPRESSED_SIZE = 8
114 _FH_UNCOMPRESSED_SIZE = 9
115 _FH_FILENAME_LENGTH = 10
116 _FH_EXTRA_FIELD_LENGTH = 11
117
118 # The "Zip64 end of central directory locator" structure, magic number, and size
119 structEndArchive64Locator = "<4sLQL"
120 stringEndArchive64Locator = b"PK\x06\x07"
121 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
122
123 # The "Zip64 end of central directory" record, magic number, size, and indices
124 # (section V.G in the format document)
125 structEndArchive64 = "<4sQ2H2L4Q"
126 stringEndArchive64 = b"PK\x06\x06"
127 sizeEndCentDir64 = struct.calcsize(structEndArchive64)
128
129 _CD64_SIGNATURE = 0
130 _CD64_DIRECTORY_RECSIZE = 1
131 _CD64_CREATE_VERSION = 2
132 _CD64_EXTRACT_VERSION = 3
133 _CD64_DISK_NUMBER = 4
134 _CD64_DISK_NUMBER_START = 5
135 _CD64_NUMBER_ENTRIES_THIS_DISK = 6
136 _CD64_NUMBER_ENTRIES_TOTAL = 7
137 _CD64_DIRECTORY_SIZE = 8
138 _CD64_OFFSET_START_CENTDIR = 9
139
140 def _check_zipfile(fp):
141 try:
142 if _EndRecData(fp):
143 return True # file has correct magic number
144 except IOError:
145 pass
146 return False
147
148 def is_zipfile(filename):
149 """Quickly see if a file is a ZIP file by checking the magic number.
150
151 The filename argument may be a file or file-like object too.
152 """
153 result = False
154 try:
155 if hasattr(filename, "read"):
156 result = _check_zipfile(fp=filename)
157 else:
158 with open(filename, "rb") as fp:
159 result = _check_zipfile(fp)
160 except IOError:
161 pass
162 return result
163
164 def _EndRecData64(fpin, offset, endrec):
165 """
166 Read the ZIP64 end-of-archive records and use that to update endrec
167 """
168 fpin.seek(offset - sizeEndCentDir64Locator, 2)
169 data = fpin.read(sizeEndCentDir64Locator)
170 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
171 if sig != stringEndArchive64Locator:
172 return endrec
173
174 if diskno != 0 or disks != 1:
175 raise BadZipfile("zipfiles that span multiple disks are not supported")
176
177 # Assume no 'zip64 extensible data'
178 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
179 data = fpin.read(sizeEndCentDir64)
180 sig, sz, create_version, read_version, disk_num, disk_dir, \
181 dircount, dircount2, dirsize, diroffset = \
182 struct.unpack(structEndArchive64, data)
183 if sig != stringEndArchive64:
184 return endrec
185
186 # Update the original endrec using data from the ZIP64 record
187 endrec[_ECD_SIGNATURE] = sig
188 endrec[_ECD_DISK_NUMBER] = disk_num
189 endrec[_ECD_DISK_START] = disk_dir
190 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
191 endrec[_ECD_ENTRIES_TOTAL] = dircount2
192 endrec[_ECD_SIZE] = dirsize
193 endrec[_ECD_OFFSET] = diroffset
194 return endrec
195
196
197 def _EndRecData(fpin):
198 """Return data from the "End of Central Directory" record, or None.
199
200 The data is a list of the nine items in the ZIP "End of central dir"
201 record followed by a tenth item, the file seek offset of this record."""
202
203 # Determine file size
204 fpin.seek(0, 2)
205 filesize = fpin.tell()
206
207 # Check to see if this is ZIP file with no archive comment (the
208 # "end of central directory" structure should be the last item in the
209 # file if this is the case).
210 try:
211 fpin.seek(-sizeEndCentDir, 2)
212 except IOError:
213 return None
214 data = fpin.read()
215 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
216 # the signature is correct and there's no comment, unpack structure
217 endrec = struct.unpack(structEndArchive, data)
218 endrec=list(endrec)
219
220 # Append a blank comment and record start offset
221 endrec.append(b"")
222 endrec.append(filesize - sizeEndCentDir)
223
224 # Try to read the "Zip64 end of central directory" structure
225 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
226
227 # Either this is not a ZIP file, or it is a ZIP file with an archive
228 # comment. Search the end of the file for the "end of central directory"
229 # record signature. The comment is the last item in the ZIP file and may be
230 # up to 64K long. It is assumed that the "end of central directory" magic
231 # number does not appear in the comment.
232 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
233 fpin.seek(maxCommentStart, 0)
234 data = fpin.read()
235 start = data.rfind(stringEndArchive)
236 if start >= 0:
237 # found the magic number; attempt to unpack and interpret
238 recData = data[start:start+sizeEndCentDir]
239 endrec = list(struct.unpack(structEndArchive, recData))
240 comment = data[start+sizeEndCentDir:]
241 # check that comment length is correct
242 if endrec[_ECD_COMMENT_SIZE] == len(comment):
243 # Append the archive comment and start offset
244 endrec.append(comment)
245 endrec.append(maxCommentStart + start)
246
247 # Try to read the "Zip64 end of central directory" structure
248 return _EndRecData64(fpin, maxCommentStart + start - filesize,
249 endrec)
250
251 # Unable to find a valid end of central directory structure
252 return
253
254
255 class ZipInfo (object):
256 """Class with attributes describing each file in the ZIP archive."""
257
258 __slots__ = (
259 'orig_filename',
260 'filename',
261 'date_time',
262 'compress_type',
263 'comment',
264 'extra',
265 'create_system',
266 'create_version',
267 'extract_version',
268 'reserved',
269 'flag_bits',
270 'volume',
271 'internal_attr',
272 'external_attr',
273 'header_offset',
274 'CRC',
275 'compress_size',
276 'file_size',
277 '_raw_time',
278 )
279
280 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
281 self.orig_filename = filename # Original file name in archive
282
283 # Terminate the file name at the first null byte. Null bytes in file
284 # names are used as tricks by viruses in archives.
285 null_byte = filename.find(chr(0))
286 if null_byte >= 0:
287 filename = filename[0:null_byte]
288 # This is used to ensure paths in generated ZIP files always use
289 # forward slashes as the directory separator, as required by the
290 # ZIP format specification.
291 if os.sep != "/" and os.sep in filename:
292 filename = filename.replace(os.sep, "/")
293
294 self.filename = filename # Normalized file name
295 self.date_time = date_time # year, month, day, hour, min, sec
296 # Standard values:
297 self.compress_type = ZIP_STORED # Type of compression for the file
298 self.comment = b"" # Comment for each file
299 self.extra = b"" # ZIP extra data
300 if sys.platform == 'win32':
301 self.create_system = 0 # System which created ZIP archive
302 else:
303 # Assume everything else is unix-y
304 self.create_system = 3 # System which created ZIP archive
305 self.create_version = 20 # Version which created ZIP archive
306 self.extract_version = 20 # Version needed to extract archive
307 self.reserved = 0 # Must be zero
308 self.flag_bits = 0 # ZIP flag bits
309 self.volume = 0 # Volume number of file header
310 self.internal_attr = 0 # Internal attributes
311 self.external_attr = 0 # External file attributes
312 # Other attributes are set by class ZipFile:
313 # header_offset Byte offset to the file header
314 # CRC CRC-32 of the uncompressed file
315 # compress_size Size of the compressed file
316 # file_size Size of the uncompressed file
317
318 def FileHeader(self):
319 """Return the per-file header as a string."""
320 dt = self.date_time
321 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
322 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
323 if self.flag_bits & 0x08:
324 # Set these to zero because we write them after the file data
325 CRC = compress_size = file_size = 0
326 else:
327 CRC = self.CRC
328 compress_size = self.compress_size
329 file_size = self.file_size
330
331 extra = self.extra
332
333 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
334 # File is larger than what fits into a 4 byte integer,
335 # fall back to the ZIP64 extension
336 fmt = '<HHQQ'
337 extra = extra + struct.pack(fmt,
338 1, struct.calcsize(fmt)-4, file_size, compress_size)
339 file_size = 0xffffffff
340 compress_size = 0xffffffff
341 self.extract_version = max(45, self.extract_version)
342 self.create_version = max(45, self.extract_version)
343
344 filename, flag_bits = self._encodeFilenameFlags()
345 header = struct.pack(structFileHeader, stringFileHeader,
346 self.extract_version, self.reserved, flag_bits,
347 self.compress_type, dostime, dosdate, CRC,
348 compress_size, file_size,
349 len(filename), len(extra))
350 return header + filename + extra
351
352 def _encodeFilenameFlags(self):
353 try:
354 return self.filename.encode('ascii'), self.flag_bits
355 except UnicodeEncodeError:
356 return self.filename.encode('utf-8'), self.flag_bits | 0x800
357
358 def _decodeExtra(self):
359 # Try to decode the extra field.
360 extra = self.extra
361 unpack = struct.unpack
362 while extra:
363 tp, ln = unpack('<HH', extra[:4])
364 if tp == 1:
365 if ln >= 24:
366 counts = unpack('<QQQ', extra[4:28])
367 elif ln == 16:
368 counts = unpack('<QQ', extra[4:20])
369 elif ln == 8:
370 counts = unpack('<Q', extra[4:12])
371 elif ln == 0:
372 counts = ()
373 else:
374 raise RuntimeError("Corrupt extra field %s"%(ln,))
375
376 idx = 0
377
378 # ZIP64 extension (large files and/or large archives)
379 if self.file_size in (0xffffffffffffffff, 0xffffffff):
380 self.file_size = counts[idx]
381 idx += 1
382
383 if self.compress_size == 0xFFFFFFFF:
384 self.compress_size = counts[idx]
385 idx += 1
386
387 if self.header_offset == 0xffffffff:
388 old = self.header_offset
389 self.header_offset = counts[idx]
390 idx+=1
391
392 extra = extra[ln+4:]
393
394
395 class _ZipDecrypter:
396 """Class to handle decryption of files stored within a ZIP archive.
397
398 ZIP supports a password-based form of encryption. Even though known
399 plaintext attacks have been found against it, it is still useful
400 to be able to get data out of such a file.
401
402 Usage:
403 zd = _ZipDecrypter(mypwd)
404 plain_char = zd(cypher_char)
405 plain_text = map(zd, cypher_text)
406 """
407
408 def _GenerateCRCTable():
409 """Generate a CRC-32 table.
410
411 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
412 internal keys. We noticed that a direct implementation is faster than
413 relying on binascii.crc32().
414 """
415 poly = 0xedb88320
416 table = [0] * 256
417 for i in range(256):
418 crc = i
419 for j in range(8):
420 if crc & 1:
421 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
422 else:
423 crc = ((crc >> 1) & 0x7FFFFFFF)
424 table[i] = crc
425 return table
426 crctable = _GenerateCRCTable()
427
428 def _crc32(self, ch, crc):
429 """Compute the CRC32 primitive on one byte."""
430 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
431
432 def __init__(self, pwd):
433 self.key0 = 305419896
434 self.key1 = 591751049
435 self.key2 = 878082192
436 for p in pwd:
437 self._UpdateKeys(p)
438
439 def _UpdateKeys(self, c):
440 self.key0 = self._crc32(c, self.key0)
441 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
442 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
443 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
444
445 def __call__(self, c):
446 """Decrypt a single character."""
447 assert isinstance(c, int)
448 k = self.key2 | 2
449 c = c ^ (((k * (k^1)) >> 8) & 255)
450 self._UpdateKeys(c)
451 return c
452
453 class ZipExtFile:
454 """File-like object for reading an archive member.
455 Is returned by ZipFile.open().
456 """
457
458 def __init__(self, fileobj, zipinfo, decrypt=None):
459 self.fileobj = fileobj
460 self.decrypter = decrypt
461 self.bytes_read = 0
462 self.rawbuffer = b''
463 self.readbuffer = b''
464 self.linebuffer = b''
465 self.eof = False
466 self.univ_newlines = False
467 self.nlSeps = (b"\n", )
468 self.lastdiscard = b''
469
470 self.compress_type = zipinfo.compress_type
471 self.compress_size = zipinfo.compress_size
472
473 self.closed = False
474 self.mode = "r"
475 self.name = zipinfo.filename
476
477 # read from compressed files in 64k blocks
478 self.compreadsize = 64*1024
479 if self.compress_type == ZIP_DEFLATED:
480 self.dc = zlib.decompressobj(-15)
481 elif self.compress_type == ZIP_BZIP2:
482 self.dc = bz2.BZ2Decompressor()
483 self.compreadsize = 900000
484
485 if hasattr(zipinfo, 'CRC'):
486 self._expected_crc = zipinfo.CRC
487 self._running_crc = crc32(b'') & 0xffffffff
488 else:
489 self._expected_crc = None
490
491 def set_univ_newlines(self, univ_newlines):
492 self.univ_newlines = univ_newlines
493
494 # pick line separator char(s) based on universal newlines flag
495 self.nlSeps = (b"\n", )
496 if self.univ_newlines:
497 self.nlSeps = (b"\r\n", b"\r", b"\n")
498
499 def __iter__(self):
500 return self
501
502 def __next__(self):
503 nextline = self.readline()
504 if not nextline:
505 raise StopIteration()
506
507 return nextline
508
509 def close(self):
510 self.closed = True
511
512 def _checkfornewline(self):
513 nl, nllen = -1, -1
514 if self.linebuffer:
515 # ugly check for cases where half of an \r\n pair was
516 # read on the last pass, and the \r was discarded. In this
517 # case we just throw away the \n at the start of the buffer.
518 if (self.lastdiscard, self.linebuffer[:1]) == (b'\r', b'\n'):
519 self.linebuffer = self.linebuffer[1:]
520
521 for sep in self.nlSeps:
522 nl = self.linebuffer.find(sep)
523 if nl >= 0:
524 nllen = len(sep)
525 return nl, nllen
526
527 return nl, nllen
528
529 def readline(self, size = -1):
530 """Read a line with approx. size. If size is negative,
531 read a whole line.
532 """
533 if size < 0:
534 size = sys.maxsize
535 elif size == 0:
536 return b''
537
538 # check for a newline already in buffer
539 nl, nllen = self._checkfornewline()
540
541 if nl >= 0:
542 # the next line was already in the buffer
543 nl = min(nl, size)
544 else:
545 # no line break in buffer - try to read more
546 size -= len(self.linebuffer)
547 while nl < 0 and size > 0:
548 buf = self.read(min(size, 100))
549 if not buf:
550 break
551 self.linebuffer += buf
552 size -= len(buf)
553
554 # check for a newline in buffer
555 nl, nllen = self._checkfornewline()
556
557 # we either ran out of bytes in the file, or
558 # met the specified size limit without finding a newline,
559 # so return current buffer
560 if nl < 0:
561 s = self.linebuffer
562 self.linebuffer = b''
563 return s
564
565 buf = self.linebuffer[:nl]
566 self.lastdiscard = self.linebuffer[nl:nl + nllen]
567 self.linebuffer = self.linebuffer[nl + nllen:]
568
569 # line is always returned with \n as newline char (except possibly
570 # for a final incomplete line in the file, which is handled above).
571 return buf + b"\n"
572
573 def readlines(self, sizehint = -1):
574 """Return a list with all (following) lines. The sizehint parameter
575 is ignored in this implementation.
576 """
577 result = []
578 while True:
579 line = self.readline()
580 if not line: break
581 result.append(line)
582 return result
583
584 def _update_crc(self, newdata, eof):
585 # Update the CRC using the given data.
586 if self._expected_crc is None:
587 # No need to compute the CRC if we don't have a reference value
588 return
589 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
590 # Check the CRC if we're at the end of the file
591 if eof and self._running_crc != self._expected_crc:
592 raise BadZipfile("Bad CRC-32 for file %r" % self.name)
593
594 def read(self, size = None):
595 # act like file obj and return empty string if size is 0
596 if size == 0:
597 return b''
598
599 # determine read size
600 bytesToRead = self.compress_size - self.bytes_read
601
602 # adjust read size for encrypted files since the first 12 bytes
603 # are for the encryption/password information
604 if self.decrypter is not None:
605 bytesToRead -= 12
606
607 if size is not None and size >= 0:
608 if self.compress_type == ZIP_STORED:
609 lr = len(self.readbuffer)
610 bytesToRead = min(bytesToRead, size - lr)
611 else:
612 if len(self.readbuffer) > size:
613 # the user has requested fewer bytes than we've already
614 # pulled through the decompressor; don't read any more
615 bytesToRead = 0
616 else:
617 # user will use up the buffer, so read some more
618 lr = len(self.rawbuffer)
619 bytesToRead = min(bytesToRead, self.compreadsize - lr)
620
621 # avoid reading past end of file contents
622 if bytesToRead + self.bytes_read > self.compress_size:
623 bytesToRead = self.compress_size - self.bytes_read
624
625 # try to read from file (if necessary)
626 if bytesToRead > 0:
627 data = self.fileobj.read(bytesToRead)
628 self.bytes_read += len(data)
629 try:
630 self.rawbuffer += data
631 except:
632 print(repr(self.fileobj), repr(self.rawbuffer),
633 repr(data))
634 raise
635
636 # handle contents of raw buffer
637 if self.rawbuffer:
638 newdata = self.rawbuffer
639 self.rawbuffer = b''
640
641 # decrypt new data if we were given an object to handle that
642 if newdata and self.decrypter is not None:
643 newdata = bytes(map(self.decrypter, newdata))
644
645 # decompress newly read data if necessary
646 if newdata and self.compress_type != ZIP_STORED:
647 newdata = self.dc.decompress(newdata)
648 self.rawbuffer = self.dc.unconsumed_tail if self.compress_type == ZIP_DEFLATED else ''
649 if self.eof and len(self.rawbuffer) == 0:
650 # we're out of raw bytes (both from the file and
651 # the local buffer); flush just to make sure the
652 # decompressor is done
653 try:
654 newdata += self.dc.flush()
655 except AttributeError:
656 pass
657 # prevent decompressor from being used again
658 self.dc = None
659
660 self._update_crc(newdata, eof=(
661 self.compress_size == self.bytes_read and
662 len(self.rawbuffer) == 0))
663 self.readbuffer += newdata
664
665 # return what the user asked for
666 if size is None or len(self.readbuffer) <= size:
667 data = self.readbuffer
668 self.readbuffer = b''
669 else:
670 data = self.readbuffer[:size]
671 self.readbuffer = self.readbuffer[size:]
672
673 return data
674
675
676 class ZipFile:
677 """ Class with methods to open, read, write, close, list zip files.
678
679 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
680
681 file: Either the path to the file, or a file-like object.
682 If it is a path, the file will be opened and closed by ZipFile.
683 mode: The mode can be either read "r", write "w" or append "a".
684 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
685 or ZIP_BZIP2 (requires bz2).
686 allowZip64: if True ZipFile will create files with ZIP64 extensions when
687 needed, otherwise it will raise an exception when this would
688 be necessary.
689
690 """
691
692 fp = None # Set here since __del__ checks it
693
694 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
695 """Open the ZIP file with mode read "r", write "w" or append "a"."""
696 if mode not in ("r", "w", "a"):
697 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
698
699 if compression == ZIP_STORED:
700 pass
701 elif compression == ZIP_DEFLATED:
702 if not zlib:
703 raise RuntimeError(
704 "Compression requires the (missing) zlib module")
705 elif compression == ZIP_BZIP2:
706 if not bz2:
707 raise RuntimeError(
708 "Compression requires the (missing) bz2 module")
709 else:
710 raise RuntimeError("That compression method is not supported")
711
712 self._allowZip64 = allowZip64
713 self._didModify = False
714 self.debug = 0 # Level of printing: 0 through 3
715 self.NameToInfo = {} # Find file info given name
716 self.filelist = [] # List of ZipInfo instances for archive
717 self.compression = compression # Method of compression
718 self.mode = key = mode.replace('b', '')[0]
719 self.pwd = None
720 self.comment = b''
721
722 # Check if we were passed a file-like object
723 if isinstance(file, str):
724 # No, it's a filename
725 self._filePassed = 0
726 self.filename = file
727 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
728 try:
729 self.fp = io.open(file, modeDict[mode])
730 except IOError:
731 if mode == 'a':
732 mode = key = 'w'
733 self.fp = io.open(file, modeDict[mode])
734 else:
735 raise
736 else:
737 self._filePassed = 1
738 self.fp = file
739 self.filename = getattr(file, 'name', None)
740
741 if key == 'r':
742 self._GetContents()
743 elif key == 'w':
744 pass
745 elif key == 'a':
746 try: # See if file is a zip file
747 self._RealGetContents()
748 # seek to start of directory and overwrite
749 self.fp.seek(self.start_dir, 0)
750 except BadZipfile: # file is not a zip file, just append
751 self.fp.seek(0, 2)
752 else:
753 if not self._filePassed:
754 self.fp.close()
755 self.fp = None
756 raise RuntimeError('Mode must be "r", "w" or "a"')
757
758 def _GetContents(self):
759 """Read the directory, making sure we close the file if the format
760 is bad."""
761 try:
762 self._RealGetContents()
763 except BadZipfile:
764 if not self._filePassed:
765 self.fp.close()
766 self.fp = None
767 raise
768
769 def _RealGetContents(self):
770 """Read in the table of contents for the ZIP file."""
771 fp = self.fp
772 endrec = _EndRecData(fp)
773 if not endrec:
774 raise BadZipfile("File is not a zip file")
775 if self.debug > 1:
776 print(endrec)
777 size_cd = endrec[_ECD_SIZE] # bytes in central directory
778 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
779 self.comment = endrec[_ECD_COMMENT] # archive comment
780
781 # "concat" is zero, unless zip was concatenated to another file
782 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
783 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
784 # If Zip64 extension structures are present, account for them
785 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
786
787 if self.debug > 2:
788 inferred = concat + offset_cd
789 print("given, inferred, offset", offset_cd, inferred, concat)
790 # self.start_dir: Position of start of central directory
791 self.start_dir = offset_cd + concat
792 fp.seek(self.start_dir, 0)
793 data = fp.read(size_cd)
794 fp = io.BytesIO(data)
795 total = 0
796 while total < size_cd:
797 centdir = fp.read(sizeCentralDir)
798 if centdir[0:4] != stringCentralDir:
799 raise BadZipfile("Bad magic number for central directory")
800 centdir = struct.unpack(structCentralDir, centdir)
801 if self.debug > 2:
802 print(centdir)
803 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
804 flags = centdir[5]
805 if flags & 0x800:
806 # UTF-8 file names extension
807 filename = filename.decode('utf-8')
808 else:
809 # Historical ZIP filename encoding
810 filename = filename.decode('cp437')
811 # Create ZipInfo instance to store file information
812 x = ZipInfo(filename)
813 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
814 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
815 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
816 (x.create_version, x.create_system, x.extract_version, x.reserved,
817 x.flag_bits, x.compress_type, t, d,
818 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
819 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
820 # Convert date/time code to (year, month, day, hour, min, sec)
821 x._raw_time = t
822 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
823 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
824
825 x._decodeExtra()
826 x.header_offset = x.header_offset + concat
827 self.filelist.append(x)
828 self.NameToInfo[x.filename] = x
829
830 # update total bytes read from central directory
831 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
832 + centdir[_CD_EXTRA_FIELD_LENGTH]
833 + centdir[_CD_COMMENT_LENGTH])
834
835 if self.debug > 2:
836 print("total", total)
837
838
839 def namelist(self):
840 """Return a list of file names in the archive."""
841 l = []
842 for data in self.filelist:
843 l.append(data.filename)
844 return l
845
846 def infolist(self):
847 """Return a list of class ZipInfo instances for files in the
848 archive."""
849 return self.filelist
850
851 def printdir(self, file=None):
852 """Print a table of contents for the zip file."""
853 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
854 file=file)
855 for zinfo in self.filelist:
856 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
857 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
858 file=file)
859
860 def testzip(self):
861 """Read all the files and check the CRC."""
862 chunk_size = 2 ** 20
863 for zinfo in self.filelist:
864 try:
865 # Read by chunks, to avoid an OverflowError or a
866 # MemoryError with very large embedded files.
867 f = self.open(zinfo.filename, "r")
868 while f.read(chunk_size): # Check CRC-32
869 pass
870 except BadZipfile:
871 return zinfo.filename
872
873 def getinfo(self, name):
874 """Return the instance of ZipInfo given 'name'."""
875 info = self.NameToInfo.get(name)
876 if info is None:
877 raise KeyError(
878 'There is no item named %r in the archive' % name)
879
880 return info
881
882 def setpassword(self, pwd):
883 """Set default password for encrypted files."""
884 assert isinstance(pwd, bytes)
885 self.pwd = pwd
886
887 def read(self, name, pwd=None):
888 """Return file bytes (as a string) for name."""
889 return self.open(name, "r", pwd).read()
890
891 def open(self, name, mode="r", pwd=None):
892 """Return file-like object for 'name'."""
893 if mode not in ("r", "U", "rU"):
894 raise RuntimeError('open() requires mode "r", "U", or "rU"')
895 if not self.fp:
896 raise RuntimeError(
897 "Attempt to read ZIP archive that was already closed")
898
899 # Only open a new file for instances where we were not
900 # given a file object in the constructor
901 if self._filePassed:
902 zef_file = self.fp
903 else:
904 zef_file = io.open(self.filename, 'rb')
905
906 # Make sure we have an info object
907 if isinstance(name, ZipInfo):
908 # 'name' is already an info object
909 zinfo = name
910 else:
911 # Get info object for name
912 zinfo = self.getinfo(name)
913
914 zef_file.seek(zinfo.header_offset, 0)
915
916 # Skip the file header:
917 fheader = zef_file.read(sizeFileHeader)
918 if fheader[0:4] != stringFileHeader:
919 raise BadZipfile("Bad magic number for file header")
920
921 fheader = struct.unpack(structFileHeader, fheader)
922 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
923 if fheader[_FH_EXTRA_FIELD_LENGTH]:
924 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
925
926 if fname != zinfo.orig_filename.encode("utf-8"):
927 raise BadZipfile(
928 'File name in directory %r and header %r differ.'
929 % (zinfo.orig_filename, fname))
930
931 # check for encrypted flag & handle password
932 is_encrypted = zinfo.flag_bits & 0x1
933 zd = None
934 if is_encrypted:
935 if not pwd:
936 pwd = self.pwd
937 if not pwd:
938 raise RuntimeError("File %s is encrypted, "
939 "password required for extraction" % name)
940
941 zd = _ZipDecrypter(pwd)
942 # The first 12 bytes in the cypher stream is an encryption header
943 # used to strengthen the algorithm. The first 11 bytes are
944 # completely random, while the 12th contains the MSB of the CRC,
945 # or the MSB of the file time depending on the header type
946 # and is used to check the correctness of the password.
947 bytes = zef_file.read(12)
948 h = list(map(zd, bytes[0:12]))
949 if zinfo.flag_bits & 0x8:
950 # compare against the file type from extended local headers
951 check_byte = (zinfo._raw_time >> 8) & 0xff
952 else:
953 # compare against the CRC otherwise
954 check_byte = (zinfo.CRC >> 24) & 0xff
955 if h[11] != check_byte:
956 raise RuntimeError("Bad password for file", name)
957
958 # build and return a ZipExtFile
959 if zd is None:
960 zef = ZipExtFile(zef_file, zinfo)
961 else:
962 zef = ZipExtFile(zef_file, zinfo, zd)
963
964 # set universal newlines on ZipExtFile if necessary
965 if "U" in mode:
966 zef.set_univ_newlines(True)
967 return zef
968
969 def extract(self, member, path=None, pwd=None):
970 """Extract a member from the archive to the current working directory,
971 using its full name. Its file information is extracted as accurately
972 as possible. `member' may be a filename or a ZipInfo object. You can
973 specify a different directory using `path'.
974 """
975 if not isinstance(member, ZipInfo):
976 member = self.getinfo(member)
977
978 if path is None:
979 path = os.getcwd()
980
981 return self._extract_member(member, path, pwd)
982
983 def extractall(self, path=None, members=None, pwd=None):
984 """Extract all members from the archive to the current working
985 directory. `path' specifies a different directory to extract to.
986 `members' is optional and must be a subset of the list returned
987 by namelist().
988 """
989 if members is None:
990 members = self.namelist()
991
992 for zipinfo in members:
993 self.extract(zipinfo, path, pwd)
994
995 def _extract_member(self, member, targetpath, pwd):
996 """Extract the ZipInfo object 'member' to a physical
997 file on the path targetpath.
998 """
999 # build the destination pathname, replacing
1000 # forward slashes to platform specific separators.
1001 # Strip trailing path separator, unless it represents the root.
1002 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
1003 and len(os.path.splitdrive(targetpath)[1]) > 1):
1004 targetpath = targetpath[:-1]
1005
1006 # don't include leading "/" from file name if present
1007 if member.filename[0] == '/':
1008 targetpath = os.path.join(targetpath, member.filename[1:])
1009 else:
1010 targetpath = os.path.join(targetpath, member.filename)
1011
1012 targetpath = os.path.normpath(targetpath)
1013
1014 # Create all upper directories if necessary.
1015 upperdirs = os.path.dirname(targetpath)
1016 if upperdirs and not os.path.exists(upperdirs):
1017 os.makedirs(upperdirs)
1018
1019 if member.filename[-1] == '/':
1020 if not os.path.isdir(targetpath):
1021 os.mkdir(targetpath)
1022 return targetpath
1023
1024 source = self.open(member, pwd=pwd)
1025 target = open(targetpath, "wb")
1026 shutil.copyfileobj(source, target)
1027 source.close()
1028 target.close()
1029
1030 return targetpath
1031
1032 def _writecheck(self, zinfo):
1033 """Check for errors before writing a file to the archive."""
1034 if zinfo.filename in self.NameToInfo:
1035 if self.debug: # Warning for duplicate names
1036 print("Duplicate name:", zinfo.filename)
1037 if self.mode not in ("w", "a"):
1038 raise RuntimeError('write() requires mode "w" or "a"')
1039 if not self.fp:
1040 raise RuntimeError(
1041 "Attempt to write ZIP archive that was already closed")
1042 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1043 raise RuntimeError(
1044 "Compression requires the (missing) zlib module")
1045 if zinfo.compress_type == ZIP_BZIP2 and not bz2:
1046 raise RuntimeError(
1047 "Compression requires the (missing) bz2 module")
1048 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED, ZIP_BZIP2):
1049 raise RuntimeError("That compression method is not supported")
1050 if zinfo.file_size > ZIP64_LIMIT:
1051 if not self._allowZip64:
1052 raise LargeZipFile("Filesize would require ZIP64 extensions")
1053 if zinfo.header_offset > ZIP64_LIMIT:
1054 if not self._allowZip64:
1055 raise LargeZipFile(
1056 "Zipfile size would require ZIP64 extensions")
1057
1058 def write(self, filename, arcname=None, compress_type=None):
1059 """Put the bytes from filename into the archive under the name
1060 arcname."""
1061 if not self.fp:
1062 raise RuntimeError(
1063 "Attempt to write to ZIP archive that was already closed")
1064
1065 st = os.stat(filename)
1066 isdir = stat.S_ISDIR(st.st_mode)
1067 mtime = time.localtime(st.st_mtime)
1068 date_time = mtime[0:6]
1069 # Create ZipInfo instance to store file information
1070 if arcname is None:
1071 arcname = filename
1072 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1073 while arcname[0] in (os.sep, os.altsep):
1074 arcname = arcname[1:]
1075 if isdir:
1076 arcname += '/'
1077 zinfo = ZipInfo(arcname, date_time)
1078 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
1079 if compress_type is None:
1080 zinfo.compress_type = self.compression
1081 else:
1082 zinfo.compress_type = compress_type
1083
1084 zinfo.file_size = st.st_size
1085 zinfo.flag_bits = 0x00
1086 zinfo.header_offset = self.fp.tell() # Start of header bytes
1087
1088 self._writecheck(zinfo)
1089 self._didModify = True
1090
1091 if isdir:
1092 zinfo.file_size = 0
1093 zinfo.compress_size = 0
1094 zinfo.CRC = 0
1095 self.filelist.append(zinfo)
1096 self.NameToInfo[zinfo.filename] = zinfo
1097 self.fp.write(zinfo.FileHeader())
1098 return
1099
1100 with open(filename, "rb") as fp:
1101 # Must overwrite CRC and sizes with correct data later
1102 zinfo.CRC = CRC = 0
1103 zinfo.compress_size = compress_size = 0
1104 zinfo.file_size = file_size = 0
1105 self.fp.write(zinfo.FileHeader())
1106 if zinfo.compress_type == ZIP_DEFLATED:
1107 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1108 zlib.DEFLATED, -15)
1109 elif zinfo.compress_type == ZIP_BZIP2:
1110 cmpr = bz2.BZ2Compressor()
1111 else:
1112 cmpr = None
1113 while 1:
1114 buf = fp.read(1024 * 8)
1115 if not buf:
1116 break
1117 file_size = file_size + len(buf)
1118 CRC = crc32(buf, CRC) & 0xffffffff
1119 if cmpr:
1120 buf = cmpr.compress(buf)
1121 compress_size = compress_size + len(buf)
1122 self.fp.write(buf)
1123 if cmpr:
1124 buf = cmpr.flush()
1125 compress_size = compress_size + len(buf)
1126 self.fp.write(buf)
1127 zinfo.compress_size = compress_size
1128 else:
1129 zinfo.compress_size = file_size
1130 zinfo.CRC = CRC
1131 zinfo.file_size = file_size
1132 # Seek backwards and write CRC and file sizes
1133 position = self.fp.tell() # Preserve current position in file
1134 self.fp.seek(zinfo.header_offset + 14, 0)
1135 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1136 zinfo.file_size))
1137 self.fp.seek(position, 0)
1138 self.filelist.append(zinfo)
1139 self.NameToInfo[zinfo.filename] = zinfo
1140
1141 def writestr(self, zinfo_or_arcname, data):
1142 """Write a file into the archive. The contents is 'data', which
1143 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1144 it is encoded as UTF-8 first.
1145 'zinfo_or_arcname' is either a ZipInfo instance or
1146 the name of the file in the archive."""
1147 if isinstance(data, str):
1148 data = data.encode("utf-8")
1149 if not isinstance(zinfo_or_arcname, ZipInfo):
1150 zinfo = ZipInfo(filename=zinfo_or_arcname,
1151 date_time=time.localtime(time.time())[:6])
1152 zinfo.compress_type = self.compression
1153 zinfo.external_attr = 0o600 << 16
1154 else:
1155 zinfo = zinfo_or_arcname
1156
1157 if not self.fp:
1158 raise RuntimeError(
1159 "Attempt to write to ZIP archive that was already closed")
1160
1161 zinfo.file_size = len(data) # Uncompressed size
1162 zinfo.header_offset = self.fp.tell() # Start of header data
1163 self._writecheck(zinfo)
1164 self._didModify = True
1165 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
1166 if zinfo.compress_type == ZIP_DEFLATED:
1167 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1168 zlib.DEFLATED, -15)
1169 data = co.compress(data) + co.flush()
1170 zinfo.compress_size = len(data) # Compressed size
1171 elif zinfo.compress_type == ZIP_BZIP2:
1172 co = bz2.BZ2Compressor()
1173 data = co.compress(data) + co.flush()
1174 zinfo.compress_size = len(data) # Compressed size
1175 else:
1176 zinfo.compress_size = zinfo.file_size
1177 zinfo.header_offset = self.fp.tell() # Start of header data
1178 self.fp.write(zinfo.FileHeader())
1179 self.fp.write(data)
1180 self.fp.flush()
1181 if zinfo.flag_bits & 0x08:
1182 # Write CRC and file sizes after the file data
1183 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1184 zinfo.file_size))
1185 self.filelist.append(zinfo)
1186 self.NameToInfo[zinfo.filename] = zinfo
1187
1188 def __del__(self):
1189 """Call the "close()" method in case the user forgot."""
1190 self.close()
1191
1192 def close(self):
1193 """Close the file, and for mode "w" and "a" write the ending
1194 records."""
1195 if self.fp is None:
1196 return
1197
1198 if self.mode in ("w", "a") and self._didModify: # write ending records
1199 count = 0
1200 pos1 = self.fp.tell()
1201 for zinfo in self.filelist: # write central directory
1202 count = count + 1
1203 dt = zinfo.date_time
1204 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1205 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1206 extra = []
1207 if zinfo.file_size > ZIP64_LIMIT \
1208 or zinfo.compress_size > ZIP64_LIMIT:
1209 extra.append(zinfo.file_size)
1210 extra.append(zinfo.compress_size)
1211 file_size = 0xffffffff
1212 compress_size = 0xffffffff
1213 else:
1214 file_size = zinfo.file_size
1215 compress_size = zinfo.compress_size
1216
1217 if zinfo.header_offset > ZIP64_LIMIT:
1218 extra.append(zinfo.header_offset)
1219 header_offset = 0xffffffff
1220 else:
1221 header_offset = zinfo.header_offset
1222
1223 extra_data = zinfo.extra
1224 if extra:
1225 # Append a ZIP64 field to the extra's
1226 extra_data = struct.pack(
1227 '<HH' + 'Q'*len(extra),
1228 1, 8*len(extra), *extra) + extra_data
1229
1230 extract_version = max(45, zinfo.extract_version)
1231 create_version = max(45, zinfo.create_version)
1232 else:
1233 extract_version = zinfo.extract_version
1234 create_version = zinfo.create_version
1235
1236 try:
1237 filename, flag_bits = zinfo._encodeFilenameFlags()
1238 centdir = struct.pack(structCentralDir,
1239 stringCentralDir, create_version,
1240 zinfo.create_system, extract_version, zinfo.reserved,
1241 flag_bits, zinfo.compress_type, dostime, dosdate,
1242 zinfo.CRC, compress_size, file_size,
1243 len(filename), len(extra_data), len(zinfo.comment),
1244 0, zinfo.internal_attr, zinfo.external_attr,
1245 header_offset)
1246 except DeprecationWarning:
1247 print((structCentralDir, stringCentralDir, create_version,
1248 zinfo.create_system, extract_version, zinfo.reserved,
1249 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1250 zinfo.CRC, compress_size, file_size,
1251 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1252 0, zinfo.internal_attr, zinfo.external_attr,
1253 header_offset), file=sys.stderr)
1254 raise
1255 self.fp.write(centdir)
1256 self.fp.write(filename)
1257 self.fp.write(extra_data)
1258 self.fp.write(zinfo.comment)
1259
1260 pos2 = self.fp.tell()
1261 # Write end-of-zip-archive record
1262 centDirCount = count
1263 centDirSize = pos2 - pos1
1264 centDirOffset = pos1
1265 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1266 centDirOffset > ZIP64_LIMIT or
1267 centDirSize > ZIP64_LIMIT):
1268 # Need to write the ZIP64 end-of-archive records
1269 zip64endrec = struct.pack(
1270 structEndArchive64, stringEndArchive64,
1271 44, 45, 45, 0, 0, centDirCount, centDirCount,
1272 centDirSize, centDirOffset)
1273 self.fp.write(zip64endrec)
1274
1275 zip64locrec = struct.pack(
1276 structEndArchive64Locator,
1277 stringEndArchive64Locator, 0, pos2, 1)
1278 self.fp.write(zip64locrec)
1279 centDirCount = min(centDirCount, 0xFFFF)
1280 centDirSize = min(centDirSize, 0xFFFFFFFF)
1281 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1282
1283 # check for valid comment length
1284 if len(self.comment) >= ZIP_MAX_COMMENT:
1285 if self.debug > 0:
1286 msg = 'Archive comment is too long; truncating to %d bytes' \
1287 % ZIP_MAX_COMMENT
1288 self.comment = self.comment[:ZIP_MAX_COMMENT]
1289
1290 endrec = struct.pack(structEndArchive, stringEndArchive,
1291 0, 0, centDirCount, centDirCount,
1292 centDirSize, centDirOffset, len(self.comment))
1293 self.fp.write(endrec)
1294 self.fp.write(self.comment)
1295 self.fp.flush()
1296
1297 if not self._filePassed:
1298 self.fp.close()
1299 self.fp = None
1300
1301
1302 class PyZipFile(ZipFile):
1303 """Class to create ZIP archives with Python library files and packages."""
1304
1305 def writepy(self, pathname, basename=""):
1306 """Add all files from "pathname" to the ZIP archive.
1307
1308 If pathname is a package directory, search the directory and
1309 all package subdirectories recursively for all *.py and enter
1310 the modules into the archive. If pathname is a plain
1311 directory, listdir *.py and enter all modules. Else, pathname
1312 must be a Python *.py file and the module will be put into the
1313 archive. Added modules are always module.pyo or module.pyc.
1314 This method will compile the module.py into module.pyc if
1315 necessary.
1316 """
1317 dir, name = os.path.split(pathname)
1318 if os.path.isdir(pathname):
1319 initname = os.path.join(pathname, "__init__.py")
1320 if os.path.isfile(initname):
1321 # This is a package directory, add it
1322 if basename:
1323 basename = "%s/%s" % (basename, name)
1324 else:
1325 basename = name
1326 if self.debug:
1327 print("Adding package in", pathname, "as", basename)
1328 fname, arcname = self._get_codename(initname[0:-3], basename)
1329 if self.debug:
1330 print("Adding", arcname)
1331 self.write(fname, arcname)
1332 dirlist = os.listdir(pathname)
1333 dirlist.remove("__init__.py")
1334 # Add all *.py files and package subdirectories
1335 for filename in dirlist:
1336 path = os.path.join(pathname, filename)
1337 root, ext = os.path.splitext(filename)
1338 if os.path.isdir(path):
1339 if os.path.isfile(os.path.join(path, "__init__.py")):
1340 # This is a package directory, add it
1341 self.writepy(path, basename) # Recursive call
1342 elif ext == ".py":
1343 fname, arcname = self._get_codename(path[0:-3],
1344 basename)
1345 if self.debug:
1346 print("Adding", arcname)
1347 self.write(fname, arcname)
1348 else:
1349 # This is NOT a package directory, add its files at top level
1350 if self.debug:
1351 print("Adding files from directory", pathname)
1352 for filename in os.listdir(pathname):
1353 path = os.path.join(pathname, filename)
1354 root, ext = os.path.splitext(filename)
1355 if ext == ".py":
1356 fname, arcname = self._get_codename(path[0:-3],
1357 basename)
1358 if self.debug:
1359 print("Adding", arcname)
1360 self.write(fname, arcname)
1361 else:
1362 if pathname[-3:] != ".py":
1363 raise RuntimeError(
1364 'Files added with writepy() must end with ".py"')
1365 fname, arcname = self._get_codename(pathname[0:-3], basename)
1366 if self.debug:
1367 print("Adding file", arcname)
1368 self.write(fname, arcname)
1369
1370 def _get_codename(self, pathname, basename):
1371 """Return (filename, archivename) for the path.
1372
1373 Given a module name path, return the correct file path and
1374 archive name, compiling if necessary. For example, given
1375 /python/lib/string, return (/python/lib/string.pyc, string).
1376 """
1377 file_py = pathname + ".py"
1378 file_pyc = pathname + ".pyc"
1379 file_pyo = pathname + ".pyo"
1380 if os.path.isfile(file_pyo) and \
1381 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1382 fname = file_pyo # Use .pyo file
1383 elif not os.path.isfile(file_pyc) or \
1384 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1385 import py_compile
1386 if self.debug:
1387 print("Compiling", file_py)
1388 try:
1389 py_compile.compile(file_py, file_pyc, None, True)
1390 except py_compile.PyCompileError as err:
1391 print(err.msg)
1392 fname = file_pyc
1393 else:
1394 fname = file_pyc
1395 archivename = os.path.split(fname)[1]
1396 if basename:
1397 archivename = "%s/%s" % (basename, archivename)
1398 return (fname, archivename)
1399
1400
1401 def main(args = None):
1402 import textwrap
1403 USAGE=textwrap.dedent("""\
1404 Usage:
1405 zipfile.py -l zipfile.zip # Show listing of a zipfile
1406 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1407 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1408 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1409 """)
1410 if args is None:
1411 args = sys.argv[1:]
1412
1413 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1414 print(USAGE)
1415 sys.exit(1)
1416
1417 if args[0] == '-l':
1418 if len(args) != 2:
1419 print(USAGE)
1420 sys.exit(1)
1421 zf = ZipFile(args[1], 'r')
1422 zf.printdir()
1423 zf.close()
1424
1425 elif args[0] == '-t':
1426 if len(args) != 2:
1427 print(USAGE)
1428 sys.exit(1)
1429 zf = ZipFile(args[1], 'r')
1430 badfile = zf.testzip()
1431 if badfile:
1432 print("The following enclosed file is corrupted: {!r}".format(badfile))
1433 print("Done testing")
1434
1435 elif args[0] == '-e':
1436 if len(args) != 3:
1437 print(USAGE)
1438 sys.exit(1)
1439
1440 zf = ZipFile(args[1], 'r')
1441 out = args[2]
1442 for path in zf.namelist():
1443 if path.startswith('./'):
1444 tgt = os.path.join(out, path[2:])
1445 else:
1446 tgt = os.path.join(out, path)
1447
1448 tgtdir = os.path.dirname(tgt)
1449 if not os.path.exists(tgtdir):
1450 os.makedirs(tgtdir)
1451 with open(tgt, 'wb') as fp:
1452 fp.write(zf.read(path))
1453 zf.close()
1454
1455 elif args[0] == '-c':
1456 if len(args) < 3:
1457 print(USAGE)
1458 sys.exit(1)
1459
1460 def addToZip(zf, path, zippath):
1461 if os.path.isfile(path):
1462 zf.write(path, zippath, ZIP_DEFLATED)
1463 elif os.path.isdir(path):
1464 for nm in os.listdir(path):
1465 addToZip(zf,
1466 os.path.join(path, nm), os.path.join(zippath, nm))
1467 # else: ignore
1468
1469 zf = ZipFile(args[1], 'w', allowZip64=True)
1470 for src in args[2:]:
1471 addToZip(zf, src, os.path.basename(src))
1472
1473 zf.close()
1474
1475 if __name__ == "__main__":
1476 main()