comparison 2.00/zipfile3.py @ 21:ec6f1a132109

A pretty usable version Test groups and testconfs in non-ZIP archives or ZIP archives with comments are not yet supported.
author Oleg Oshmyan <chortos@inbox.lv>
date Fri, 06 Aug 2010 15:39:29 +0000
parents
children 3d535503161f
comparison
equal deleted inserted replaced
20:5bfa23cd638d 21:ec6f1a132109
1 """
2 Read and write ZIP files.
3
4 XXX references to utf-8 need further investigation.
5 """
6 # Improved by Chortos-2 in 2010 (added bzip2 support)
7 import struct, os, time, sys, shutil
8 import binascii, io, stat
9
10 try:
11 import zlib # We may need its compression method
12 crc32 = zlib.crc32
13 except ImportError:
14 zlib = None
15 crc32 = binascii.crc32
16
17 try:
18 import bz2 # We may need its compression method
19 except ImportError:
20 bz2 = None
21
22 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
23 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", "ZIP_BZIP2" ]
24
25 class BadZipfile(Exception):
26 pass
27
28
29 class LargeZipFile(Exception):
30 """
31 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
32 and those extensions are disabled.
33 """
34
35 error = BadZipfile # The exception raised by this module
36
37 ZIP64_LIMIT = (1 << 31) - 1
38 ZIP_FILECOUNT_LIMIT = 1 << 16
39 ZIP_MAX_COMMENT = (1 << 16) - 1
40
41 # constants for Zip file compression methods
42 ZIP_STORED = 0
43 ZIP_DEFLATED = 8
44 ZIP_BZIP2 = 12
45 # Other ZIP compression methods not supported
46
47 # Below are some formats and associated data for reading/writing headers using
48 # the struct module. The names and structures of headers/records are those used
49 # in the PKWARE description of the ZIP file format:
50 # http://www.pkware.com/documents/casestudies/APPNOTE.TXT
51 # (URL valid as of January 2008)
52
53 # The "end of central directory" structure, magic number, size, and indices
54 # (section V.I in the format document)
55 structEndArchive = b"<4s4H2LH"
56 stringEndArchive = b"PK\005\006"
57 sizeEndCentDir = struct.calcsize(structEndArchive)
58
59 _ECD_SIGNATURE = 0
60 _ECD_DISK_NUMBER = 1
61 _ECD_DISK_START = 2
62 _ECD_ENTRIES_THIS_DISK = 3
63 _ECD_ENTRIES_TOTAL = 4
64 _ECD_SIZE = 5
65 _ECD_OFFSET = 6
66 _ECD_COMMENT_SIZE = 7
67 # These last two indices are not part of the structure as defined in the
68 # spec, but they are used internally by this module as a convenience
69 _ECD_COMMENT = 8
70 _ECD_LOCATION = 9
71
72 # The "central directory" structure, magic number, size, and indices
73 # of entries in the structure (section V.F in the format document)
74 structCentralDir = "<4s4B4HL2L5H2L"
75 stringCentralDir = b"PK\001\002"
76 sizeCentralDir = struct.calcsize(structCentralDir)
77
78 # indexes of entries in the central directory structure
79 _CD_SIGNATURE = 0
80 _CD_CREATE_VERSION = 1
81 _CD_CREATE_SYSTEM = 2
82 _CD_EXTRACT_VERSION = 3
83 _CD_EXTRACT_SYSTEM = 4
84 _CD_FLAG_BITS = 5
85 _CD_COMPRESS_TYPE = 6
86 _CD_TIME = 7
87 _CD_DATE = 8
88 _CD_CRC = 9
89 _CD_COMPRESSED_SIZE = 10
90 _CD_UNCOMPRESSED_SIZE = 11
91 _CD_FILENAME_LENGTH = 12
92 _CD_EXTRA_FIELD_LENGTH = 13
93 _CD_COMMENT_LENGTH = 14
94 _CD_DISK_NUMBER_START = 15
95 _CD_INTERNAL_FILE_ATTRIBUTES = 16
96 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
97 _CD_LOCAL_HEADER_OFFSET = 18
98
99 # The "local file header" structure, magic number, size, and indices
100 # (section V.A in the format document)
101 structFileHeader = "<4s2B4HL2L2H"
102 stringFileHeader = b"PK\003\004"
103 sizeFileHeader = struct.calcsize(structFileHeader)
104
105 _FH_SIGNATURE = 0
106 _FH_EXTRACT_VERSION = 1
107 _FH_EXTRACT_SYSTEM = 2
108 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
109 _FH_COMPRESSION_METHOD = 4
110 _FH_LAST_MOD_TIME = 5
111 _FH_LAST_MOD_DATE = 6
112 _FH_CRC = 7
113 _FH_COMPRESSED_SIZE = 8
114 _FH_UNCOMPRESSED_SIZE = 9
115 _FH_FILENAME_LENGTH = 10
116 _FH_EXTRA_FIELD_LENGTH = 11
117
118 # The "Zip64 end of central directory locator" structure, magic number, and size
119 structEndArchive64Locator = "<4sLQL"
120 stringEndArchive64Locator = b"PK\x06\x07"
121 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
122
123 # The "Zip64 end of central directory" record, magic number, size, and indices
124 # (section V.G in the format document)
125 structEndArchive64 = "<4sQ2H2L4Q"
126 stringEndArchive64 = b"PK\x06\x06"
127 sizeEndCentDir64 = struct.calcsize(structEndArchive64)
128
129 _CD64_SIGNATURE = 0
130 _CD64_DIRECTORY_RECSIZE = 1
131 _CD64_CREATE_VERSION = 2
132 _CD64_EXTRACT_VERSION = 3
133 _CD64_DISK_NUMBER = 4
134 _CD64_DISK_NUMBER_START = 5
135 _CD64_NUMBER_ENTRIES_THIS_DISK = 6
136 _CD64_NUMBER_ENTRIES_TOTAL = 7
137 _CD64_DIRECTORY_SIZE = 8
138 _CD64_OFFSET_START_CENTDIR = 9
139
140 def _check_zipfile(fp):
141 try:
142 if _EndRecData(fp):
143 return True # file has correct magic number
144 except IOError:
145 pass
146 return False
147
148 def is_zipfile(filename):
149 """Quickly see if a file is a ZIP file by checking the magic number.
150
151 The filename argument may be a file or file-like object too.
152 """
153 result = False
154 try:
155 if hasattr(filename, "read"):
156 result = _check_zipfile(fp=filename)
157 else:
158 with open(filename, "rb") as fp:
159 result = _check_zipfile(fp)
160 except IOError:
161 pass
162 return result
163
164 def _EndRecData64(fpin, offset, endrec):
165 """
166 Read the ZIP64 end-of-archive records and use that to update endrec
167 """
168 fpin.seek(offset - sizeEndCentDir64Locator, 2)
169 data = fpin.read(sizeEndCentDir64Locator)
170 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
171 if sig != stringEndArchive64Locator:
172 return endrec
173
174 if diskno != 0 or disks != 1:
175 raise BadZipfile("zipfiles that span multiple disks are not supported")
176
177 # Assume no 'zip64 extensible data'
178 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
179 data = fpin.read(sizeEndCentDir64)
180 sig, sz, create_version, read_version, disk_num, disk_dir, \
181 dircount, dircount2, dirsize, diroffset = \
182 struct.unpack(structEndArchive64, data)
183 if sig != stringEndArchive64:
184 return endrec
185
186 # Update the original endrec using data from the ZIP64 record
187 endrec[_ECD_SIGNATURE] = sig
188 endrec[_ECD_DISK_NUMBER] = disk_num
189 endrec[_ECD_DISK_START] = disk_dir
190 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
191 endrec[_ECD_ENTRIES_TOTAL] = dircount2
192 endrec[_ECD_SIZE] = dirsize
193 endrec[_ECD_OFFSET] = diroffset
194 return endrec
195
196
197 def _EndRecData(fpin):
198 """Return data from the "End of Central Directory" record, or None.
199
200 The data is a list of the nine items in the ZIP "End of central dir"
201 record followed by a tenth item, the file seek offset of this record."""
202
203 # Determine file size
204 fpin.seek(0, 2)
205 filesize = fpin.tell()
206
207 # Check to see if this is ZIP file with no archive comment (the
208 # "end of central directory" structure should be the last item in the
209 # file if this is the case).
210 try:
211 fpin.seek(-sizeEndCentDir, 2)
212 except IOError:
213 return None
214 data = fpin.read()
215 if data[0:4] == stringEndArchive and data[-2:] == b"\000\000":
216 # the signature is correct and there's no comment, unpack structure
217 endrec = struct.unpack(structEndArchive, data)
218 endrec=list(endrec)
219
220 # Append a blank comment and record start offset
221 endrec.append(b"")
222 endrec.append(filesize - sizeEndCentDir)
223
224 # Try to read the "Zip64 end of central directory" structure
225 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
226
227 # Either this is not a ZIP file, or it is a ZIP file with an archive
228 # comment. Search the end of the file for the "end of central directory"
229 # record signature. The comment is the last item in the ZIP file and may be
230 # up to 64K long. It is assumed that the "end of central directory" magic
231 # number does not appear in the comment.
232 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
233 fpin.seek(maxCommentStart, 0)
234 data = fpin.read()
235 start = data.rfind(stringEndArchive)
236 if start >= 0:
237 # found the magic number; attempt to unpack and interpret
238 recData = data[start:start+sizeEndCentDir]
239 endrec = list(struct.unpack(structEndArchive, recData))
240 comment = data[start+sizeEndCentDir:]
241 # check that comment length is correct
242 if endrec[_ECD_COMMENT_SIZE] == len(comment):
243 # Append the archive comment and start offset
244 endrec.append(comment)
245 endrec.append(maxCommentStart + start)
246
247 # Try to read the "Zip64 end of central directory" structure
248 return _EndRecData64(fpin, maxCommentStart + start - filesize,
249 endrec)
250
251 # Unable to find a valid end of central directory structure
252 return
253
254
255 class ZipInfo (object):
256 """Class with attributes describing each file in the ZIP archive."""
257
258 __slots__ = (
259 'orig_filename',
260 'filename',
261 'date_time',
262 'compress_type',
263 'comment',
264 'extra',
265 'create_system',
266 'create_version',
267 'extract_version',
268 'reserved',
269 'flag_bits',
270 'volume',
271 'internal_attr',
272 'external_attr',
273 'header_offset',
274 'CRC',
275 'compress_size',
276 'file_size',
277 '_raw_time',
278 )
279
280 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
281 self.orig_filename = filename # Original file name in archive
282
283 # Terminate the file name at the first null byte. Null bytes in file
284 # names are used as tricks by viruses in archives.
285 null_byte = filename.find(chr(0))
286 if null_byte >= 0:
287 filename = filename[0:null_byte]
288 # This is used to ensure paths in generated ZIP files always use
289 # forward slashes as the directory separator, as required by the
290 # ZIP format specification.
291 if os.sep != "/" and os.sep in filename:
292 filename = filename.replace(os.sep, "/")
293
294 self.filename = filename # Normalized file name
295 self.date_time = date_time # year, month, day, hour, min, sec
296 # Standard values:
297 self.compress_type = ZIP_STORED # Type of compression for the file
298 self.comment = b"" # Comment for each file
299 self.extra = b"" # ZIP extra data
300 if sys.platform == 'win32':
301 self.create_system = 0 # System which created ZIP archive
302 else:
303 # Assume everything else is unix-y
304 self.create_system = 3 # System which created ZIP archive
305 self.create_version = 20 # Version which created ZIP archive
306 self.extract_version = 20 # Version needed to extract archive
307 self.reserved = 0 # Must be zero
308 self.flag_bits = 0 # ZIP flag bits
309 self.volume = 0 # Volume number of file header
310 self.internal_attr = 0 # Internal attributes
311 self.external_attr = 0 # External file attributes
312 # Other attributes are set by class ZipFile:
313 # header_offset Byte offset to the file header
314 # CRC CRC-32 of the uncompressed file
315 # compress_size Size of the compressed file
316 # file_size Size of the uncompressed file
317
318 def FileHeader(self):
319 """Return the per-file header as a string."""
320 dt = self.date_time
321 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
322 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
323 if self.flag_bits & 0x08:
324 # Set these to zero because we write them after the file data
325 CRC = compress_size = file_size = 0
326 else:
327 CRC = self.CRC
328 compress_size = self.compress_size
329 file_size = self.file_size
330
331 extra = self.extra
332
333 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
334 # File is larger than what fits into a 4 byte integer,
335 # fall back to the ZIP64 extension
336 fmt = '<HHQQ'
337 extra = extra + struct.pack(fmt,
338 1, struct.calcsize(fmt)-4, file_size, compress_size)
339 file_size = 0xffffffff
340 compress_size = 0xffffffff
341 self.extract_version = max(45, self.extract_version)
342 self.create_version = max(45, self.extract_version)
343
344 filename, flag_bits = self._encodeFilenameFlags()
345 header = struct.pack(structFileHeader, stringFileHeader,
346 self.extract_version, self.reserved, flag_bits,
347 self.compress_type, dostime, dosdate, CRC,
348 compress_size, file_size,
349 len(filename), len(extra))
350 return header + filename + extra
351
352 def _encodeFilenameFlags(self):
353 try:
354 return self.filename.encode('ascii'), self.flag_bits
355 except UnicodeEncodeError:
356 return self.filename.encode('utf-8'), self.flag_bits | 0x800
357
358 def _decodeExtra(self):
359 # Try to decode the extra field.
360 extra = self.extra
361 unpack = struct.unpack
362 while extra:
363 tp, ln = unpack('<HH', extra[:4])
364 if tp == 1:
365 if ln >= 24:
366 counts = unpack('<QQQ', extra[4:28])
367 elif ln == 16:
368 counts = unpack('<QQ', extra[4:20])
369 elif ln == 8:
370 counts = unpack('<Q', extra[4:12])
371 elif ln == 0:
372 counts = ()
373 else:
374 raise RuntimeError("Corrupt extra field %s"%(ln,))
375
376 idx = 0
377
378 # ZIP64 extension (large files and/or large archives)
379 if self.file_size in (0xffffffffffffffff, 0xffffffff):
380 self.file_size = counts[idx]
381 idx += 1
382
383 if self.compress_size == 0xFFFFFFFF:
384 self.compress_size = counts[idx]
385 idx += 1
386
387 if self.header_offset == 0xffffffff:
388 old = self.header_offset
389 self.header_offset = counts[idx]
390 idx+=1
391
392 extra = extra[ln+4:]
393
394
395 class _ZipDecrypter:
396 """Class to handle decryption of files stored within a ZIP archive.
397
398 ZIP supports a password-based form of encryption. Even though known
399 plaintext attacks have been found against it, it is still useful
400 to be able to get data out of such a file.
401
402 Usage:
403 zd = _ZipDecrypter(mypwd)
404 plain_char = zd(cypher_char)
405 plain_text = map(zd, cypher_text)
406 """
407
408 def _GenerateCRCTable():
409 """Generate a CRC-32 table.
410
411 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
412 internal keys. We noticed that a direct implementation is faster than
413 relying on binascii.crc32().
414 """
415 poly = 0xedb88320
416 table = [0] * 256
417 for i in range(256):
418 crc = i
419 for j in range(8):
420 if crc & 1:
421 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
422 else:
423 crc = ((crc >> 1) & 0x7FFFFFFF)
424 table[i] = crc
425 return table
426 crctable = _GenerateCRCTable()
427
428 def _crc32(self, ch, crc):
429 """Compute the CRC32 primitive on one byte."""
430 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
431
432 def __init__(self, pwd):
433 self.key0 = 305419896
434 self.key1 = 591751049
435 self.key2 = 878082192
436 for p in pwd:
437 self._UpdateKeys(p)
438
439 def _UpdateKeys(self, c):
440 self.key0 = self._crc32(c, self.key0)
441 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
442 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
443 self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
444
445 def __call__(self, c):
446 """Decrypt a single character."""
447 assert isinstance(c, int)
448 k = self.key2 | 2
449 c = c ^ (((k * (k^1)) >> 8) & 255)
450 self._UpdateKeys(c)
451 return c
452
453 class ZipExtFile:
454 """File-like object for reading an archive member.
455 Is returned by ZipFile.open().
456 """
457
458 def __init__(self, fileobj, zipinfo, decrypt=None):
459 self.fileobj = fileobj
460 self.decrypter = decrypt
461 self.bytes_read = 0
462 self.rawbuffer = b''
463 self.readbuffer = b''
464 self.linebuffer = b''
465 self.eof = False
466 self.univ_newlines = False
467 self.nlSeps = (b"\n", )
468 self.lastdiscard = b''
469
470 self.compress_type = zipinfo.compress_type
471 self.compress_size = zipinfo.compress_size
472
473 self.closed = False
474 self.mode = "r"
475 self.name = zipinfo.filename
476
477 # read from compressed files in 64k blocks
478 self.compreadsize = 64*1024
479 if self.compress_type == ZIP_DEFLATED:
480 self.dc = zlib.decompressobj(-15)
481 elif self.compress_type == ZIP_BZIP2:
482 self.dc = bz2.BZ2Decompressor()
483 self.compreadsize = 900000
484
485 def set_univ_newlines(self, univ_newlines):
486 self.univ_newlines = univ_newlines
487
488 # pick line separator char(s) based on universal newlines flag
489 self.nlSeps = (b"\n", )
490 if self.univ_newlines:
491 self.nlSeps = (b"\r\n", b"\r", b"\n")
492
493 def __iter__(self):
494 return self
495
496 def __next__(self):
497 nextline = self.readline()
498 if not nextline:
499 raise StopIteration()
500
501 return nextline
502
503 def close(self):
504 self.closed = True
505
506 def _checkfornewline(self):
507 nl, nllen = -1, -1
508 if self.linebuffer:
509 # ugly check for cases where half of an \r\n pair was
510 # read on the last pass, and the \r was discarded. In this
511 # case we just throw away the \n at the start of the buffer.
512 if (self.lastdiscard, self.linebuffer[:1]) == (b'\r', b'\n'):
513 self.linebuffer = self.linebuffer[1:]
514
515 for sep in self.nlSeps:
516 nl = self.linebuffer.find(sep)
517 if nl >= 0:
518 nllen = len(sep)
519 return nl, nllen
520
521 return nl, nllen
522
523 def readline(self, size = -1):
524 """Read a line with approx. size. If size is negative,
525 read a whole line.
526 """
527 if size < 0:
528 size = sys.maxsize
529 elif size == 0:
530 return b''
531
532 # check for a newline already in buffer
533 nl, nllen = self._checkfornewline()
534
535 if nl >= 0:
536 # the next line was already in the buffer
537 nl = min(nl, size)
538 else:
539 # no line break in buffer - try to read more
540 size -= len(self.linebuffer)
541 while nl < 0 and size > 0:
542 buf = self.read(min(size, 100))
543 if not buf:
544 break
545 self.linebuffer += buf
546 size -= len(buf)
547
548 # check for a newline in buffer
549 nl, nllen = self._checkfornewline()
550
551 # we either ran out of bytes in the file, or
552 # met the specified size limit without finding a newline,
553 # so return current buffer
554 if nl < 0:
555 s = self.linebuffer
556 self.linebuffer = b''
557 return s
558
559 buf = self.linebuffer[:nl]
560 self.lastdiscard = self.linebuffer[nl:nl + nllen]
561 self.linebuffer = self.linebuffer[nl + nllen:]
562
563 # line is always returned with \n as newline char (except possibly
564 # for a final incomplete line in the file, which is handled above).
565 return buf + b"\n"
566
567 def readlines(self, sizehint = -1):
568 """Return a list with all (following) lines. The sizehint parameter
569 is ignored in this implementation.
570 """
571 result = []
572 while True:
573 line = self.readline()
574 if not line: break
575 result.append(line)
576 return result
577
578 def read(self, size = None):
579 # act like file obj and return empty string if size is 0
580 if size == 0:
581 return b''
582
583 # determine read size
584 bytesToRead = self.compress_size - self.bytes_read
585
586 # adjust read size for encrypted files since the first 12 bytes
587 # are for the encryption/password information
588 if self.decrypter is not None:
589 bytesToRead -= 12
590
591 if size is not None and size >= 0:
592 if self.compress_type == ZIP_STORED:
593 lr = len(self.readbuffer)
594 bytesToRead = min(bytesToRead, size - lr)
595 else:
596 if len(self.readbuffer) > size:
597 # the user has requested fewer bytes than we've already
598 # pulled through the decompressor; don't read any more
599 bytesToRead = 0
600 else:
601 # user will use up the buffer, so read some more
602 lr = len(self.rawbuffer)
603 bytesToRead = min(bytesToRead, self.compreadsize - lr)
604
605 # avoid reading past end of file contents
606 if bytesToRead + self.bytes_read > self.compress_size:
607 bytesToRead = self.compress_size - self.bytes_read
608
609 # try to read from file (if necessary)
610 if bytesToRead > 0:
611 data = self.fileobj.read(bytesToRead)
612 self.bytes_read += len(data)
613 try:
614 self.rawbuffer += data
615 except:
616 print(repr(self.fileobj), repr(self.rawbuffer),
617 repr(data))
618 raise
619
620 # handle contents of raw buffer
621 if self.rawbuffer:
622 newdata = self.rawbuffer
623 self.rawbuffer = b''
624
625 # decrypt new data if we were given an object to handle that
626 if newdata and self.decrypter is not None:
627 newdata = bytes(map(self.decrypter, newdata))
628
629 # decompress newly read data if necessary
630 if newdata and self.compress_type != ZIP_STORED:
631 newdata = self.dc.decompress(newdata)
632 self.rawbuffer = self.dc.unconsumed_tail if self.compress_type == ZIP_DEFLATED else ''
633 if self.eof and len(self.rawbuffer) == 0:
634 # we're out of raw bytes (both from the file and
635 # the local buffer); flush just to make sure the
636 # decompressor is done
637 if hasattr(self.dc, 'flush'):
638 newdata += self.dc.flush()
639 # prevent decompressor from being used again
640 self.dc = None
641
642 self.readbuffer += newdata
643
644
645 # return what the user asked for
646 if size is None or len(self.readbuffer) <= size:
647 data = self.readbuffer
648 self.readbuffer = b''
649 else:
650 data = self.readbuffer[:size]
651 self.readbuffer = self.readbuffer[size:]
652
653 return data
654
655
656 class ZipFile:
657 """ Class with methods to open, read, write, close, list zip files.
658
659 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
660
661 file: Either the path to the file, or a file-like object.
662 If it is a path, the file will be opened and closed by ZipFile.
663 mode: The mode can be either read "r", write "w" or append "a".
664 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
665 or ZIP_BZIP2 (requires bz2).
666 allowZip64: if True ZipFile will create files with ZIP64 extensions when
667 needed, otherwise it will raise an exception when this would
668 be necessary.
669
670 """
671
672 fp = None # Set here since __del__ checks it
673
674 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
675 """Open the ZIP file with mode read "r", write "w" or append "a"."""
676 if mode not in ("r", "w", "a"):
677 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
678
679 if compression == ZIP_STORED:
680 pass
681 elif compression == ZIP_DEFLATED:
682 if not zlib:
683 raise RuntimeError(
684 "Compression requires the (missing) zlib module")
685 elif compression == ZIP_BZIP2:
686 if not bz2:
687 raise RuntimeError(
688 "Compression requires the (missing) bz2 module")
689 else:
690 raise RuntimeError("That compression method is not supported")
691
692 self._allowZip64 = allowZip64
693 self._didModify = False
694 self.debug = 0 # Level of printing: 0 through 3
695 self.NameToInfo = {} # Find file info given name
696 self.filelist = [] # List of ZipInfo instances for archive
697 self.compression = compression # Method of compression
698 self.mode = key = mode.replace('b', '')[0]
699 self.pwd = None
700 self.comment = b''
701
702 # Check if we were passed a file-like object
703 if isinstance(file, str):
704 # No, it's a filename
705 self._filePassed = 0
706 self.filename = file
707 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
708 try:
709 self.fp = io.open(file, modeDict[mode])
710 except IOError:
711 if mode == 'a':
712 mode = key = 'w'
713 self.fp = io.open(file, modeDict[mode])
714 else:
715 raise
716 else:
717 self._filePassed = 1
718 self.fp = file
719 self.filename = getattr(file, 'name', None)
720
721 if key == 'r':
722 self._GetContents()
723 elif key == 'w':
724 pass
725 elif key == 'a':
726 try: # See if file is a zip file
727 self._RealGetContents()
728 # seek to start of directory and overwrite
729 self.fp.seek(self.start_dir, 0)
730 except BadZipfile: # file is not a zip file, just append
731 self.fp.seek(0, 2)
732 else:
733 if not self._filePassed:
734 self.fp.close()
735 self.fp = None
736 raise RuntimeError('Mode must be "r", "w" or "a"')
737
738 def _GetContents(self):
739 """Read the directory, making sure we close the file if the format
740 is bad."""
741 try:
742 self._RealGetContents()
743 except BadZipfile:
744 if not self._filePassed:
745 self.fp.close()
746 self.fp = None
747 raise
748
749 def _RealGetContents(self):
750 """Read in the table of contents for the ZIP file."""
751 fp = self.fp
752 endrec = _EndRecData(fp)
753 if not endrec:
754 raise BadZipfile("File is not a zip file")
755 if self.debug > 1:
756 print(endrec)
757 size_cd = endrec[_ECD_SIZE] # bytes in central directory
758 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
759 self.comment = endrec[_ECD_COMMENT] # archive comment
760
761 # "concat" is zero, unless zip was concatenated to another file
762 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
763 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
764 # If Zip64 extension structures are present, account for them
765 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
766
767 if self.debug > 2:
768 inferred = concat + offset_cd
769 print("given, inferred, offset", offset_cd, inferred, concat)
770 # self.start_dir: Position of start of central directory
771 self.start_dir = offset_cd + concat
772 fp.seek(self.start_dir, 0)
773 data = fp.read(size_cd)
774 fp = io.BytesIO(data)
775 total = 0
776 while total < size_cd:
777 centdir = fp.read(sizeCentralDir)
778 if centdir[0:4] != stringCentralDir:
779 raise BadZipfile("Bad magic number for central directory")
780 centdir = struct.unpack(structCentralDir, centdir)
781 if self.debug > 2:
782 print(centdir)
783 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
784 flags = centdir[5]
785 if flags & 0x800:
786 # UTF-8 file names extension
787 filename = filename.decode('utf-8')
788 else:
789 # Historical ZIP filename encoding
790 filename = filename.decode('cp437')
791 # Create ZipInfo instance to store file information
792 x = ZipInfo(filename)
793 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
794 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
795 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
796 (x.create_version, x.create_system, x.extract_version, x.reserved,
797 x.flag_bits, x.compress_type, t, d,
798 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
799 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
800 # Convert date/time code to (year, month, day, hour, min, sec)
801 x._raw_time = t
802 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
803 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
804
805 x._decodeExtra()
806 x.header_offset = x.header_offset + concat
807 self.filelist.append(x)
808 self.NameToInfo[x.filename] = x
809
810 # update total bytes read from central directory
811 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
812 + centdir[_CD_EXTRA_FIELD_LENGTH]
813 + centdir[_CD_COMMENT_LENGTH])
814
815 if self.debug > 2:
816 print("total", total)
817
818
819 def namelist(self):
820 """Return a list of file names in the archive."""
821 l = []
822 for data in self.filelist:
823 l.append(data.filename)
824 return l
825
826 def infolist(self):
827 """Return a list of class ZipInfo instances for files in the
828 archive."""
829 return self.filelist
830
831 def printdir(self, file=None):
832 """Print a table of contents for the zip file."""
833 print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
834 file=file)
835 for zinfo in self.filelist:
836 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
837 print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
838 file=file)
839
840 def testzip(self):
841 """Read all the files and check the CRC."""
842 chunk_size = 2 ** 20
843 for zinfo in self.filelist:
844 try:
845 # Read by chunks, to avoid an OverflowError or a
846 # MemoryError with very large embedded files.
847 f = self.open(zinfo.filename, "r")
848 while f.read(chunk_size): # Check CRC-32
849 pass
850 except BadZipfile:
851 return zinfo.filename
852
853 def getinfo(self, name):
854 """Return the instance of ZipInfo given 'name'."""
855 info = self.NameToInfo.get(name)
856 if info is None:
857 raise KeyError(
858 'There is no item named %r in the archive' % name)
859
860 return info
861
862 def setpassword(self, pwd):
863 """Set default password for encrypted files."""
864 assert isinstance(pwd, bytes)
865 self.pwd = pwd
866
867 def read(self, name, pwd=None):
868 """Return file bytes (as a string) for name."""
869 return self.open(name, "r", pwd).read()
870
871 def open(self, name, mode="r", pwd=None):
872 """Return file-like object for 'name'."""
873 if mode not in ("r", "U", "rU"):
874 raise RuntimeError('open() requires mode "r", "U", or "rU"')
875 if not self.fp:
876 raise RuntimeError(
877 "Attempt to read ZIP archive that was already closed")
878
879 # Only open a new file for instances where we were not
880 # given a file object in the constructor
881 if self._filePassed:
882 zef_file = self.fp
883 else:
884 zef_file = io.open(self.filename, 'rb')
885
886 # Make sure we have an info object
887 if isinstance(name, ZipInfo):
888 # 'name' is already an info object
889 zinfo = name
890 else:
891 # Get info object for name
892 zinfo = self.getinfo(name)
893
894 zef_file.seek(zinfo.header_offset, 0)
895
896 # Skip the file header:
897 fheader = zef_file.read(sizeFileHeader)
898 if fheader[0:4] != stringFileHeader:
899 raise BadZipfile("Bad magic number for file header")
900
901 fheader = struct.unpack(structFileHeader, fheader)
902 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
903 if fheader[_FH_EXTRA_FIELD_LENGTH]:
904 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
905
906 if fname != zinfo.orig_filename.encode("utf-8"):
907 raise BadZipfile(
908 'File name in directory %r and header %r differ.'
909 % (zinfo.orig_filename, fname))
910
911 # check for encrypted flag & handle password
912 is_encrypted = zinfo.flag_bits & 0x1
913 zd = None
914 if is_encrypted:
915 if not pwd:
916 pwd = self.pwd
917 if not pwd:
918 raise RuntimeError("File %s is encrypted, "
919 "password required for extraction" % name)
920
921 zd = _ZipDecrypter(pwd)
922 # The first 12 bytes in the cypher stream is an encryption header
923 # used to strengthen the algorithm. The first 11 bytes are
924 # completely random, while the 12th contains the MSB of the CRC,
925 # or the MSB of the file time depending on the header type
926 # and is used to check the correctness of the password.
927 bytes = zef_file.read(12)
928 h = list(map(zd, bytes[0:12]))
929 if zinfo.flag_bits & 0x8:
930 # compare against the file type from extended local headers
931 check_byte = (zinfo._raw_time >> 8) & 0xff
932 else:
933 # compare against the CRC otherwise
934 check_byte = (zinfo.CRC >> 24) & 0xff
935 if h[11] != check_byte:
936 raise RuntimeError("Bad password for file", name)
937
938 # build and return a ZipExtFile
939 if zd is None:
940 zef = ZipExtFile(zef_file, zinfo)
941 else:
942 zef = ZipExtFile(zef_file, zinfo, zd)
943
944 # set universal newlines on ZipExtFile if necessary
945 if "U" in mode:
946 zef.set_univ_newlines(True)
947 return zef
948
949 def extract(self, member, path=None, pwd=None):
950 """Extract a member from the archive to the current working directory,
951 using its full name. Its file information is extracted as accurately
952 as possible. `member' may be a filename or a ZipInfo object. You can
953 specify a different directory using `path'.
954 """
955 if not isinstance(member, ZipInfo):
956 member = self.getinfo(member)
957
958 if path is None:
959 path = os.getcwd()
960
961 return self._extract_member(member, path, pwd)
962
963 def extractall(self, path=None, members=None, pwd=None):
964 """Extract all members from the archive to the current working
965 directory. `path' specifies a different directory to extract to.
966 `members' is optional and must be a subset of the list returned
967 by namelist().
968 """
969 if members is None:
970 members = self.namelist()
971
972 for zipinfo in members:
973 self.extract(zipinfo, path, pwd)
974
975 def _extract_member(self, member, targetpath, pwd):
976 """Extract the ZipInfo object 'member' to a physical
977 file on the path targetpath.
978 """
979 # build the destination pathname, replacing
980 # forward slashes to platform specific separators.
981 # Strip trailing path separator, unless it represents the root.
982 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
983 and len(os.path.splitdrive(targetpath)[1]) > 1):
984 targetpath = targetpath[:-1]
985
986 # don't include leading "/" from file name if present
987 if member.filename[0] == '/':
988 targetpath = os.path.join(targetpath, member.filename[1:])
989 else:
990 targetpath = os.path.join(targetpath, member.filename)
991
992 targetpath = os.path.normpath(targetpath)
993
994 # Create all upper directories if necessary.
995 upperdirs = os.path.dirname(targetpath)
996 if upperdirs and not os.path.exists(upperdirs):
997 os.makedirs(upperdirs)
998
999 if member.filename[-1] == '/':
1000 if not os.path.isdir(targetpath):
1001 os.mkdir(targetpath)
1002 return targetpath
1003
1004 source = self.open(member, pwd=pwd)
1005 target = open(targetpath, "wb")
1006 shutil.copyfileobj(source, target)
1007 source.close()
1008 target.close()
1009
1010 return targetpath
1011
1012 def _writecheck(self, zinfo):
1013 """Check for errors before writing a file to the archive."""
1014 if zinfo.filename in self.NameToInfo:
1015 if self.debug: # Warning for duplicate names
1016 print("Duplicate name:", zinfo.filename)
1017 if self.mode not in ("w", "a"):
1018 raise RuntimeError('write() requires mode "w" or "a"')
1019 if not self.fp:
1020 raise RuntimeError(
1021 "Attempt to write ZIP archive that was already closed")
1022 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1023 raise RuntimeError(
1024 "Compression requires the (missing) zlib module")
1025 if zinfo.compress_type == ZIP_BZIP2 and not bz2:
1026 raise RuntimeError(
1027 "Compression requires the (missing) bz2 module")
1028 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED, ZIP_BZIP2):
1029 raise RuntimeError("That compression method is not supported")
1030 if zinfo.file_size > ZIP64_LIMIT:
1031 if not self._allowZip64:
1032 raise LargeZipFile("Filesize would require ZIP64 extensions")
1033 if zinfo.header_offset > ZIP64_LIMIT:
1034 if not self._allowZip64:
1035 raise LargeZipFile(
1036 "Zipfile size would require ZIP64 extensions")
1037
1038 def write(self, filename, arcname=None, compress_type=None):
1039 """Put the bytes from filename into the archive under the name
1040 arcname."""
1041 if not self.fp:
1042 raise RuntimeError(
1043 "Attempt to write to ZIP archive that was already closed")
1044
1045 st = os.stat(filename)
1046 isdir = stat.S_ISDIR(st.st_mode)
1047 mtime = time.localtime(st.st_mtime)
1048 date_time = mtime[0:6]
1049 # Create ZipInfo instance to store file information
1050 if arcname is None:
1051 arcname = filename
1052 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1053 while arcname[0] in (os.sep, os.altsep):
1054 arcname = arcname[1:]
1055 if isdir:
1056 arcname += '/'
1057 zinfo = ZipInfo(arcname, date_time)
1058 zinfo.external_attr = (st[0] & 0xFFFF) << 16 # Unix attributes
1059 if compress_type is None:
1060 zinfo.compress_type = self.compression
1061 else:
1062 zinfo.compress_type = compress_type
1063
1064 zinfo.file_size = st.st_size
1065 zinfo.flag_bits = 0x00
1066 zinfo.header_offset = self.fp.tell() # Start of header bytes
1067
1068 self._writecheck(zinfo)
1069 self._didModify = True
1070
1071 if isdir:
1072 zinfo.file_size = 0
1073 zinfo.compress_size = 0
1074 zinfo.CRC = 0
1075 self.filelist.append(zinfo)
1076 self.NameToInfo[zinfo.filename] = zinfo
1077 self.fp.write(zinfo.FileHeader())
1078 return
1079
1080 with open(filename, "rb") as fp:
1081 # Must overwrite CRC and sizes with correct data later
1082 zinfo.CRC = CRC = 0
1083 zinfo.compress_size = compress_size = 0
1084 zinfo.file_size = file_size = 0
1085 self.fp.write(zinfo.FileHeader())
1086 if zinfo.compress_type == ZIP_DEFLATED:
1087 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1088 zlib.DEFLATED, -15)
1089 elif zinfo.compress_type == ZIP_BZIP2:
1090 cmpr = bz2.BZ2Compressor()
1091 else:
1092 cmpr = None
1093 while 1:
1094 buf = fp.read(1024 * 8)
1095 if not buf:
1096 break
1097 file_size = file_size + len(buf)
1098 CRC = crc32(buf, CRC) & 0xffffffff
1099 if cmpr:
1100 buf = cmpr.compress(buf)
1101 compress_size = compress_size + len(buf)
1102 self.fp.write(buf)
1103 if cmpr:
1104 buf = cmpr.flush()
1105 compress_size = compress_size + len(buf)
1106 self.fp.write(buf)
1107 zinfo.compress_size = compress_size
1108 else:
1109 zinfo.compress_size = file_size
1110 zinfo.CRC = CRC
1111 zinfo.file_size = file_size
1112 # Seek backwards and write CRC and file sizes
1113 position = self.fp.tell() # Preserve current position in file
1114 self.fp.seek(zinfo.header_offset + 14, 0)
1115 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1116 zinfo.file_size))
1117 self.fp.seek(position, 0)
1118 self.filelist.append(zinfo)
1119 self.NameToInfo[zinfo.filename] = zinfo
1120
1121 def writestr(self, zinfo_or_arcname, data):
1122 """Write a file into the archive. The contents is 'data', which
1123 may be either a 'str' or a 'bytes' instance; if it is a 'str',
1124 it is encoded as UTF-8 first.
1125 'zinfo_or_arcname' is either a ZipInfo instance or
1126 the name of the file in the archive."""
1127 if isinstance(data, str):
1128 data = data.encode("utf-8")
1129 if not isinstance(zinfo_or_arcname, ZipInfo):
1130 zinfo = ZipInfo(filename=zinfo_or_arcname,
1131 date_time=time.localtime(time.time())[:6])
1132 zinfo.compress_type = self.compression
1133 zinfo.external_attr = 0o600 << 16
1134 else:
1135 zinfo = zinfo_or_arcname
1136
1137 if not self.fp:
1138 raise RuntimeError(
1139 "Attempt to write to ZIP archive that was already closed")
1140
1141 zinfo.file_size = len(data) # Uncompressed size
1142 zinfo.header_offset = self.fp.tell() # Start of header data
1143 self._writecheck(zinfo)
1144 self._didModify = True
1145 zinfo.CRC = crc32(data) & 0xffffffff # CRC-32 checksum
1146 if zinfo.compress_type == ZIP_DEFLATED:
1147 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1148 zlib.DEFLATED, -15)
1149 data = co.compress(data) + co.flush()
1150 zinfo.compress_size = len(data) # Compressed size
1151 elif zinfo.compress_type == ZIP_BZIP2:
1152 co = bz2.BZ2Compressor()
1153 data = co.compress(data) + co.flush()
1154 zinfo.compress_size = len(data) # Compressed size
1155 else:
1156 zinfo.compress_size = zinfo.file_size
1157 zinfo.header_offset = self.fp.tell() # Start of header data
1158 self.fp.write(zinfo.FileHeader())
1159 self.fp.write(data)
1160 self.fp.flush()
1161 if zinfo.flag_bits & 0x08:
1162 # Write CRC and file sizes after the file data
1163 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1164 zinfo.file_size))
1165 self.filelist.append(zinfo)
1166 self.NameToInfo[zinfo.filename] = zinfo
1167
1168 def __del__(self):
1169 """Call the "close()" method in case the user forgot."""
1170 self.close()
1171
1172 def close(self):
1173 """Close the file, and for mode "w" and "a" write the ending
1174 records."""
1175 if self.fp is None:
1176 return
1177
1178 if self.mode in ("w", "a") and self._didModify: # write ending records
1179 count = 0
1180 pos1 = self.fp.tell()
1181 for zinfo in self.filelist: # write central directory
1182 count = count + 1
1183 dt = zinfo.date_time
1184 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1185 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1186 extra = []
1187 if zinfo.file_size > ZIP64_LIMIT \
1188 or zinfo.compress_size > ZIP64_LIMIT:
1189 extra.append(zinfo.file_size)
1190 extra.append(zinfo.compress_size)
1191 file_size = 0xffffffff
1192 compress_size = 0xffffffff
1193 else:
1194 file_size = zinfo.file_size
1195 compress_size = zinfo.compress_size
1196
1197 if zinfo.header_offset > ZIP64_LIMIT:
1198 extra.append(zinfo.header_offset)
1199 header_offset = 0xffffffff
1200 else:
1201 header_offset = zinfo.header_offset
1202
1203 extra_data = zinfo.extra
1204 if extra:
1205 # Append a ZIP64 field to the extra's
1206 extra_data = struct.pack(
1207 '<HH' + 'Q'*len(extra),
1208 1, 8*len(extra), *extra) + extra_data
1209
1210 extract_version = max(45, zinfo.extract_version)
1211 create_version = max(45, zinfo.create_version)
1212 else:
1213 extract_version = zinfo.extract_version
1214 create_version = zinfo.create_version
1215
1216 try:
1217 filename, flag_bits = zinfo._encodeFilenameFlags()
1218 centdir = struct.pack(structCentralDir,
1219 stringCentralDir, create_version,
1220 zinfo.create_system, extract_version, zinfo.reserved,
1221 flag_bits, zinfo.compress_type, dostime, dosdate,
1222 zinfo.CRC, compress_size, file_size,
1223 len(filename), len(extra_data), len(zinfo.comment),
1224 0, zinfo.internal_attr, zinfo.external_attr,
1225 header_offset)
1226 except DeprecationWarning:
1227 print((structCentralDir, stringCentralDir, create_version,
1228 zinfo.create_system, extract_version, zinfo.reserved,
1229 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1230 zinfo.CRC, compress_size, file_size,
1231 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1232 0, zinfo.internal_attr, zinfo.external_attr,
1233 header_offset), file=sys.stderr)
1234 raise
1235 self.fp.write(centdir)
1236 self.fp.write(filename)
1237 self.fp.write(extra_data)
1238 self.fp.write(zinfo.comment)
1239
1240 pos2 = self.fp.tell()
1241 # Write end-of-zip-archive record
1242 centDirCount = count
1243 centDirSize = pos2 - pos1
1244 centDirOffset = pos1
1245 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1246 centDirOffset > ZIP64_LIMIT or
1247 centDirSize > ZIP64_LIMIT):
1248 # Need to write the ZIP64 end-of-archive records
1249 zip64endrec = struct.pack(
1250 structEndArchive64, stringEndArchive64,
1251 44, 45, 45, 0, 0, centDirCount, centDirCount,
1252 centDirSize, centDirOffset)
1253 self.fp.write(zip64endrec)
1254
1255 zip64locrec = struct.pack(
1256 structEndArchive64Locator,
1257 stringEndArchive64Locator, 0, pos2, 1)
1258 self.fp.write(zip64locrec)
1259 centDirCount = min(centDirCount, 0xFFFF)
1260 centDirSize = min(centDirSize, 0xFFFFFFFF)
1261 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1262
1263 # check for valid comment length
1264 if len(self.comment) >= ZIP_MAX_COMMENT:
1265 if self.debug > 0:
1266 msg = 'Archive comment is too long; truncating to %d bytes' \
1267 % ZIP_MAX_COMMENT
1268 self.comment = self.comment[:ZIP_MAX_COMMENT]
1269
1270 endrec = struct.pack(structEndArchive, stringEndArchive,
1271 0, 0, centDirCount, centDirCount,
1272 centDirSize, centDirOffset, len(self.comment))
1273 self.fp.write(endrec)
1274 self.fp.write(self.comment)
1275 self.fp.flush()
1276
1277 if not self._filePassed:
1278 self.fp.close()
1279 self.fp = None
1280
1281
1282 class PyZipFile(ZipFile):
1283 """Class to create ZIP archives with Python library files and packages."""
1284
1285 def writepy(self, pathname, basename=""):
1286 """Add all files from "pathname" to the ZIP archive.
1287
1288 If pathname is a package directory, search the directory and
1289 all package subdirectories recursively for all *.py and enter
1290 the modules into the archive. If pathname is a plain
1291 directory, listdir *.py and enter all modules. Else, pathname
1292 must be a Python *.py file and the module will be put into the
1293 archive. Added modules are always module.pyo or module.pyc.
1294 This method will compile the module.py into module.pyc if
1295 necessary.
1296 """
1297 dir, name = os.path.split(pathname)
1298 if os.path.isdir(pathname):
1299 initname = os.path.join(pathname, "__init__.py")
1300 if os.path.isfile(initname):
1301 # This is a package directory, add it
1302 if basename:
1303 basename = "%s/%s" % (basename, name)
1304 else:
1305 basename = name
1306 if self.debug:
1307 print("Adding package in", pathname, "as", basename)
1308 fname, arcname = self._get_codename(initname[0:-3], basename)
1309 if self.debug:
1310 print("Adding", arcname)
1311 self.write(fname, arcname)
1312 dirlist = os.listdir(pathname)
1313 dirlist.remove("__init__.py")
1314 # Add all *.py files and package subdirectories
1315 for filename in dirlist:
1316 path = os.path.join(pathname, filename)
1317 root, ext = os.path.splitext(filename)
1318 if os.path.isdir(path):
1319 if os.path.isfile(os.path.join(path, "__init__.py")):
1320 # This is a package directory, add it
1321 self.writepy(path, basename) # Recursive call
1322 elif ext == ".py":
1323 fname, arcname = self._get_codename(path[0:-3],
1324 basename)
1325 if self.debug:
1326 print("Adding", arcname)
1327 self.write(fname, arcname)
1328 else:
1329 # This is NOT a package directory, add its files at top level
1330 if self.debug:
1331 print("Adding files from directory", pathname)
1332 for filename in os.listdir(pathname):
1333 path = os.path.join(pathname, filename)
1334 root, ext = os.path.splitext(filename)
1335 if ext == ".py":
1336 fname, arcname = self._get_codename(path[0:-3],
1337 basename)
1338 if self.debug:
1339 print("Adding", arcname)
1340 self.write(fname, arcname)
1341 else:
1342 if pathname[-3:] != ".py":
1343 raise RuntimeError(
1344 'Files added with writepy() must end with ".py"')
1345 fname, arcname = self._get_codename(pathname[0:-3], basename)
1346 if self.debug:
1347 print("Adding file", arcname)
1348 self.write(fname, arcname)
1349
1350 def _get_codename(self, pathname, basename):
1351 """Return (filename, archivename) for the path.
1352
1353 Given a module name path, return the correct file path and
1354 archive name, compiling if necessary. For example, given
1355 /python/lib/string, return (/python/lib/string.pyc, string).
1356 """
1357 file_py = pathname + ".py"
1358 file_pyc = pathname + ".pyc"
1359 file_pyo = pathname + ".pyo"
1360 if os.path.isfile(file_pyo) and \
1361 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1362 fname = file_pyo # Use .pyo file
1363 elif not os.path.isfile(file_pyc) or \
1364 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1365 import py_compile
1366 if self.debug:
1367 print("Compiling", file_py)
1368 try:
1369 py_compile.compile(file_py, file_pyc, None, True)
1370 except py_compile.PyCompileError as err:
1371 print(err.msg)
1372 fname = file_pyc
1373 else:
1374 fname = file_pyc
1375 archivename = os.path.split(fname)[1]
1376 if basename:
1377 archivename = "%s/%s" % (basename, archivename)
1378 return (fname, archivename)
1379
1380
1381 def main(args = None):
1382 import textwrap
1383 USAGE=textwrap.dedent("""\
1384 Usage:
1385 zipfile.py -l zipfile.zip # Show listing of a zipfile
1386 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1387 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1388 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1389 """)
1390 if args is None:
1391 args = sys.argv[1:]
1392
1393 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1394 print(USAGE)
1395 sys.exit(1)
1396
1397 if args[0] == '-l':
1398 if len(args) != 2:
1399 print(USAGE)
1400 sys.exit(1)
1401 zf = ZipFile(args[1], 'r')
1402 zf.printdir()
1403 zf.close()
1404
1405 elif args[0] == '-t':
1406 if len(args) != 2:
1407 print(USAGE)
1408 sys.exit(1)
1409 zf = ZipFile(args[1], 'r')
1410 zf.testzip()
1411 print("Done testing")
1412
1413 elif args[0] == '-e':
1414 if len(args) != 3:
1415 print(USAGE)
1416 sys.exit(1)
1417
1418 zf = ZipFile(args[1], 'r')
1419 out = args[2]
1420 for path in zf.namelist():
1421 if path.startswith('./'):
1422 tgt = os.path.join(out, path[2:])
1423 else:
1424 tgt = os.path.join(out, path)
1425
1426 tgtdir = os.path.dirname(tgt)
1427 if not os.path.exists(tgtdir):
1428 os.makedirs(tgtdir)
1429 with open(tgt, 'wb') as fp:
1430 fp.write(zf.read(path))
1431 zf.close()
1432
1433 elif args[0] == '-c':
1434 if len(args) < 3:
1435 print(USAGE)
1436 sys.exit(1)
1437
1438 def addToZip(zf, path, zippath):
1439 if os.path.isfile(path):
1440 zf.write(path, zippath, ZIP_DEFLATED)
1441 elif os.path.isdir(path):
1442 for nm in os.listdir(path):
1443 addToZip(zf,
1444 os.path.join(path, nm), os.path.join(zippath, nm))
1445 # else: ignore
1446
1447 zf = ZipFile(args[1], 'w', allowZip64=True)
1448 for src in args[2:]:
1449 addToZip(zf, src, os.path.basename(src))
1450
1451 zf.close()
1452
1453 if __name__ == "__main__":
1454 main()