comparison zipfile27.py @ 49:245150080c48 1.20 1.20.3

Converted 1.20 into a branch
author Oleg Oshmyan <chortos@inbox.lv>
date Sun, 19 Dec 2010 23:23:24 +0200
parents 2.00/zipfile27.py@faf0254fe279
children
comparison
equal deleted inserted replaced
47:06f1683c8db9 49:245150080c48
1 """
2 Read and write ZIP files.
3 """
4 # Improved by Chortos-2 in 2010 (added bzip2 support)
5 import struct, os, time, sys, shutil
6 import binascii, cStringIO, stat
7 import io
8 import re
9
10 try:
11 import zlib # We may need its compression method
12 crc32 = zlib.crc32
13 except ImportError:
14 zlib = None
15 crc32 = binascii.crc32
16
17 try:
18 import bz2 # We may need its compression method
19 except ImportError:
20 bz2 = None
21
22 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
23 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", "ZIP_BZIP2" ]
24
25 class BadZipfile(Exception):
26 pass
27
28
29 class LargeZipFile(Exception):
30 """
31 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
32 and those extensions are disabled.
33 """
34
35 error = BadZipfile # The exception raised by this module
36
37 ZIP64_LIMIT = (1 << 31) - 1
38 ZIP_FILECOUNT_LIMIT = 1 << 16
39 ZIP_MAX_COMMENT = (1 << 16) - 1
40
41 # constants for Zip file compression methods
42 ZIP_STORED = 0
43 ZIP_DEFLATED = 8
44 ZIP_BZIP2 = 12
45 # Other ZIP compression methods not supported
46
47 # Below are some formats and associated data for reading/writing headers using
48 # the struct module. The names and structures of headers/records are those used
49 # in the PKWARE description of the ZIP file format:
50 # http://www.pkware.com/documents/casestudies/APPNOTE.TXT
51 # (URL valid as of January 2008)
52
53 # The "end of central directory" structure, magic number, size, and indices
54 # (section V.I in the format document)
55 structEndArchive = "<4s4H2LH"
56 stringEndArchive = "PK\005\006"
57 sizeEndCentDir = struct.calcsize(structEndArchive)
58
59 _ECD_SIGNATURE = 0
60 _ECD_DISK_NUMBER = 1
61 _ECD_DISK_START = 2
62 _ECD_ENTRIES_THIS_DISK = 3
63 _ECD_ENTRIES_TOTAL = 4
64 _ECD_SIZE = 5
65 _ECD_OFFSET = 6
66 _ECD_COMMENT_SIZE = 7
67 # These last two indices are not part of the structure as defined in the
68 # spec, but they are used internally by this module as a convenience
69 _ECD_COMMENT = 8
70 _ECD_LOCATION = 9
71
72 # The "central directory" structure, magic number, size, and indices
73 # of entries in the structure (section V.F in the format document)
74 structCentralDir = "<4s4B4HL2L5H2L"
75 stringCentralDir = "PK\001\002"
76 sizeCentralDir = struct.calcsize(structCentralDir)
77
78 # indexes of entries in the central directory structure
79 _CD_SIGNATURE = 0
80 _CD_CREATE_VERSION = 1
81 _CD_CREATE_SYSTEM = 2
82 _CD_EXTRACT_VERSION = 3
83 _CD_EXTRACT_SYSTEM = 4
84 _CD_FLAG_BITS = 5
85 _CD_COMPRESS_TYPE = 6
86 _CD_TIME = 7
87 _CD_DATE = 8
88 _CD_CRC = 9
89 _CD_COMPRESSED_SIZE = 10
90 _CD_UNCOMPRESSED_SIZE = 11
91 _CD_FILENAME_LENGTH = 12
92 _CD_EXTRA_FIELD_LENGTH = 13
93 _CD_COMMENT_LENGTH = 14
94 _CD_DISK_NUMBER_START = 15
95 _CD_INTERNAL_FILE_ATTRIBUTES = 16
96 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
97 _CD_LOCAL_HEADER_OFFSET = 18
98
99 # The "local file header" structure, magic number, size, and indices
100 # (section V.A in the format document)
101 structFileHeader = "<4s2B4HL2L2H"
102 stringFileHeader = "PK\003\004"
103 sizeFileHeader = struct.calcsize(structFileHeader)
104
105 _FH_SIGNATURE = 0
106 _FH_EXTRACT_VERSION = 1
107 _FH_EXTRACT_SYSTEM = 2
108 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
109 _FH_COMPRESSION_METHOD = 4
110 _FH_LAST_MOD_TIME = 5
111 _FH_LAST_MOD_DATE = 6
112 _FH_CRC = 7
113 _FH_COMPRESSED_SIZE = 8
114 _FH_UNCOMPRESSED_SIZE = 9
115 _FH_FILENAME_LENGTH = 10
116 _FH_EXTRA_FIELD_LENGTH = 11
117
118 # The "Zip64 end of central directory locator" structure, magic number, and size
119 structEndArchive64Locator = "<4sLQL"
120 stringEndArchive64Locator = "PK\x06\x07"
121 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
122
123 # The "Zip64 end of central directory" record, magic number, size, and indices
124 # (section V.G in the format document)
125 structEndArchive64 = "<4sQ2H2L4Q"
126 stringEndArchive64 = "PK\x06\x06"
127 sizeEndCentDir64 = struct.calcsize(structEndArchive64)
128
129 _CD64_SIGNATURE = 0
130 _CD64_DIRECTORY_RECSIZE = 1
131 _CD64_CREATE_VERSION = 2
132 _CD64_EXTRACT_VERSION = 3
133 _CD64_DISK_NUMBER = 4
134 _CD64_DISK_NUMBER_START = 5
135 _CD64_NUMBER_ENTRIES_THIS_DISK = 6
136 _CD64_NUMBER_ENTRIES_TOTAL = 7
137 _CD64_DIRECTORY_SIZE = 8
138 _CD64_OFFSET_START_CENTDIR = 9
139
140 def _check_zipfile(fp):
141 try:
142 if _EndRecData(fp):
143 return True # file has correct magic number
144 except IOError:
145 pass
146 return False
147
148 def is_zipfile(filename):
149 """Quickly see if a file is a ZIP file by checking the magic number.
150
151 The filename argument may be a file or file-like object too.
152 """
153 result = False
154 try:
155 if hasattr(filename, "read"):
156 result = _check_zipfile(fp=filename)
157 else:
158 with open(filename, "rb") as fp:
159 result = _check_zipfile(fp)
160 except IOError:
161 pass
162 return result
163
164 def _EndRecData64(fpin, offset, endrec):
165 """
166 Read the ZIP64 end-of-archive records and use that to update endrec
167 """
168 try:
169 fpin.seek(offset - sizeEndCentDir64Locator, 2)
170 except IOError:
171 # If the seek fails, the file is not large enough to contain a ZIP64
172 # end-of-archive record, so just return the end record we were given.
173 return endrec
174
175 data = fpin.read(sizeEndCentDir64Locator)
176 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
177 if sig != stringEndArchive64Locator:
178 return endrec
179
180 if diskno != 0 or disks != 1:
181 raise BadZipfile("zipfiles that span multiple disks are not supported")
182
183 # Assume no 'zip64 extensible data'
184 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
185 data = fpin.read(sizeEndCentDir64)
186 sig, sz, create_version, read_version, disk_num, disk_dir, \
187 dircount, dircount2, dirsize, diroffset = \
188 struct.unpack(structEndArchive64, data)
189 if sig != stringEndArchive64:
190 return endrec
191
192 # Update the original endrec using data from the ZIP64 record
193 endrec[_ECD_SIGNATURE] = sig
194 endrec[_ECD_DISK_NUMBER] = disk_num
195 endrec[_ECD_DISK_START] = disk_dir
196 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
197 endrec[_ECD_ENTRIES_TOTAL] = dircount2
198 endrec[_ECD_SIZE] = dirsize
199 endrec[_ECD_OFFSET] = diroffset
200 return endrec
201
202
203 def _EndRecData(fpin):
204 """Return data from the "End of Central Directory" record, or None.
205
206 The data is a list of the nine items in the ZIP "End of central dir"
207 record followed by a tenth item, the file seek offset of this record."""
208
209 # Determine file size
210 fpin.seek(0, 2)
211 filesize = fpin.tell()
212
213 # Check to see if this is ZIP file with no archive comment (the
214 # "end of central directory" structure should be the last item in the
215 # file if this is the case).
216 try:
217 fpin.seek(-sizeEndCentDir, 2)
218 except IOError:
219 return None
220 data = fpin.read()
221 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
222 # the signature is correct and there's no comment, unpack structure
223 endrec = struct.unpack(structEndArchive, data)
224 endrec=list(endrec)
225
226 # Append a blank comment and record start offset
227 endrec.append("")
228 endrec.append(filesize - sizeEndCentDir)
229
230 # Try to read the "Zip64 end of central directory" structure
231 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
232
233 # Either this is not a ZIP file, or it is a ZIP file with an archive
234 # comment. Search the end of the file for the "end of central directory"
235 # record signature. The comment is the last item in the ZIP file and may be
236 # up to 64K long. It is assumed that the "end of central directory" magic
237 # number does not appear in the comment.
238 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
239 fpin.seek(maxCommentStart, 0)
240 data = fpin.read()
241 start = data.rfind(stringEndArchive)
242 if start >= 0:
243 # found the magic number; attempt to unpack and interpret
244 recData = data[start:start+sizeEndCentDir]
245 endrec = list(struct.unpack(structEndArchive, recData))
246 comment = data[start+sizeEndCentDir:]
247 # check that comment length is correct
248 if endrec[_ECD_COMMENT_SIZE] == len(comment):
249 # Append the archive comment and start offset
250 endrec.append(comment)
251 endrec.append(maxCommentStart + start)
252
253 # Try to read the "Zip64 end of central directory" structure
254 return _EndRecData64(fpin, maxCommentStart + start - filesize,
255 endrec)
256
257 # Unable to find a valid end of central directory structure
258 return
259
260
261 class ZipInfo (object):
262 """Class with attributes describing each file in the ZIP archive."""
263
264 __slots__ = (
265 'orig_filename',
266 'filename',
267 'date_time',
268 'compress_type',
269 'comment',
270 'extra',
271 'create_system',
272 'create_version',
273 'extract_version',
274 'reserved',
275 'flag_bits',
276 'volume',
277 'internal_attr',
278 'external_attr',
279 'header_offset',
280 'CRC',
281 'compress_size',
282 'file_size',
283 '_raw_time',
284 )
285
286 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
287 self.orig_filename = filename # Original file name in archive
288
289 # Terminate the file name at the first null byte. Null bytes in file
290 # names are used as tricks by viruses in archives.
291 null_byte = filename.find(chr(0))
292 if null_byte >= 0:
293 filename = filename[0:null_byte]
294 # This is used to ensure paths in generated ZIP files always use
295 # forward slashes as the directory separator, as required by the
296 # ZIP format specification.
297 if os.sep != "/" and os.sep in filename:
298 filename = filename.replace(os.sep, "/")
299
300 self.filename = filename # Normalized file name
301 self.date_time = date_time # year, month, day, hour, min, sec
302 # Standard values:
303 self.compress_type = ZIP_STORED # Type of compression for the file
304 self.comment = "" # Comment for each file
305 self.extra = "" # ZIP extra data
306 if sys.platform == 'win32':
307 self.create_system = 0 # System which created ZIP archive
308 else:
309 # Assume everything else is unix-y
310 self.create_system = 3 # System which created ZIP archive
311 self.create_version = 20 # Version which created ZIP archive
312 self.extract_version = 20 # Version needed to extract archive
313 self.reserved = 0 # Must be zero
314 self.flag_bits = 0 # ZIP flag bits
315 self.volume = 0 # Volume number of file header
316 self.internal_attr = 0 # Internal attributes
317 self.external_attr = 0 # External file attributes
318 # Other attributes are set by class ZipFile:
319 # header_offset Byte offset to the file header
320 # CRC CRC-32 of the uncompressed file
321 # compress_size Size of the compressed file
322 # file_size Size of the uncompressed file
323
324 def FileHeader(self):
325 """Return the per-file header as a string."""
326 dt = self.date_time
327 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
328 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
329 if self.flag_bits & 0x08:
330 # Set these to zero because we write them after the file data
331 CRC = compress_size = file_size = 0
332 else:
333 CRC = self.CRC
334 compress_size = self.compress_size
335 file_size = self.file_size
336
337 extra = self.extra
338
339 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
340 # File is larger than what fits into a 4 byte integer,
341 # fall back to the ZIP64 extension
342 fmt = '<HHQQ'
343 extra = extra + struct.pack(fmt,
344 1, struct.calcsize(fmt)-4, file_size, compress_size)
345 file_size = 0xffffffff
346 compress_size = 0xffffffff
347 self.extract_version = max(45, self.extract_version)
348 self.create_version = max(45, self.extract_version)
349
350 filename, flag_bits = self._encodeFilenameFlags()
351 header = struct.pack(structFileHeader, stringFileHeader,
352 self.extract_version, self.reserved, flag_bits,
353 self.compress_type, dostime, dosdate, CRC,
354 compress_size, file_size,
355 len(filename), len(extra))
356 return header + filename + extra
357
358 def _encodeFilenameFlags(self):
359 if isinstance(self.filename, unicode):
360 try:
361 return self.filename.encode('ascii'), self.flag_bits
362 except UnicodeEncodeError:
363 return self.filename.encode('utf-8'), self.flag_bits | 0x800
364 else:
365 return self.filename, self.flag_bits
366
367 def _decodeFilename(self):
368 if self.flag_bits & 0x800:
369 return self.filename.decode('utf-8')
370 else:
371 return self.filename
372
373 def _decodeExtra(self):
374 # Try to decode the extra field.
375 extra = self.extra
376 unpack = struct.unpack
377 while extra:
378 tp, ln = unpack('<HH', extra[:4])
379 if tp == 1:
380 if ln >= 24:
381 counts = unpack('<QQQ', extra[4:28])
382 elif ln == 16:
383 counts = unpack('<QQ', extra[4:20])
384 elif ln == 8:
385 counts = unpack('<Q', extra[4:12])
386 elif ln == 0:
387 counts = ()
388 else:
389 raise RuntimeError, "Corrupt extra field %s"%(ln,)
390
391 idx = 0
392
393 # ZIP64 extension (large files and/or large archives)
394 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
395 self.file_size = counts[idx]
396 idx += 1
397
398 if self.compress_size == 0xFFFFFFFFL:
399 self.compress_size = counts[idx]
400 idx += 1
401
402 if self.header_offset == 0xffffffffL:
403 old = self.header_offset
404 self.header_offset = counts[idx]
405 idx+=1
406
407 extra = extra[ln+4:]
408
409
410 class _ZipDecrypter:
411 """Class to handle decryption of files stored within a ZIP archive.
412
413 ZIP supports a password-based form of encryption. Even though known
414 plaintext attacks have been found against it, it is still useful
415 to be able to get data out of such a file.
416
417 Usage:
418 zd = _ZipDecrypter(mypwd)
419 plain_char = zd(cypher_char)
420 plain_text = map(zd, cypher_text)
421 """
422
423 def _GenerateCRCTable():
424 """Generate a CRC-32 table.
425
426 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
427 internal keys. We noticed that a direct implementation is faster than
428 relying on binascii.crc32().
429 """
430 poly = 0xedb88320
431 table = [0] * 256
432 for i in range(256):
433 crc = i
434 for j in range(8):
435 if crc & 1:
436 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
437 else:
438 crc = ((crc >> 1) & 0x7FFFFFFF)
439 table[i] = crc
440 return table
441 crctable = _GenerateCRCTable()
442
443 def _crc32(self, ch, crc):
444 """Compute the CRC32 primitive on one byte."""
445 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
446
447 def __init__(self, pwd):
448 self.key0 = 305419896
449 self.key1 = 591751049
450 self.key2 = 878082192
451 for p in pwd:
452 self._UpdateKeys(p)
453
454 def _UpdateKeys(self, c):
455 self.key0 = self._crc32(c, self.key0)
456 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
457 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
458 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
459
460 def __call__(self, c):
461 """Decrypt a single character."""
462 c = ord(c)
463 k = self.key2 | 2
464 c = c ^ (((k * (k^1)) >> 8) & 255)
465 c = chr(c)
466 self._UpdateKeys(c)
467 return c
468
469 class ZipExtFile(io.BufferedIOBase):
470 """File-like object for reading an archive member.
471 Is returned by ZipFile.open().
472 """
473
474 # Max size supported by decompressor.
475 MAX_N = 1 << 31 - 1
476
477 # Read from compressed files in 4k blocks.
478 MIN_READ_SIZE = 4096
479
480 # Search for universal newlines or line chunks.
481 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
482
483 def __init__(self, fileobj, mode, zipinfo, decrypter=None):
484 self._fileobj = fileobj
485 self._decrypter = decrypter
486
487 self._compress_type = zipinfo.compress_type
488 self._compress_size = zipinfo.compress_size
489 self._compress_left = zipinfo.compress_size
490
491 if self._compress_type == ZIP_DEFLATED:
492 self._decompressor = zlib.decompressobj(-15)
493 elif self._compress_type == ZIP_BZIP2:
494 self._decompressor = bz2.BZ2Decompressor()
495 self.MIN_READ_SIZE = 900000
496 self._unconsumed = ''
497
498 self._readbuffer = ''
499 self._offset = 0
500
501 self._universal = 'U' in mode
502 self.newlines = None
503
504 # Adjust read size for encrypted files since the first 12 bytes
505 # are for the encryption/password information.
506 if self._decrypter is not None:
507 self._compress_left -= 12
508
509 self.mode = mode
510 self.name = zipinfo.filename
511
512 if hasattr(zipinfo, 'CRC'):
513 self._expected_crc = zipinfo.CRC
514 self._running_crc = crc32(b'') & 0xffffffff
515 else:
516 self._expected_crc = None
517
518 def readline(self, limit=-1):
519 """Read and return a line from the stream.
520
521 If limit is specified, at most limit bytes will be read.
522 """
523
524 if not self._universal and limit < 0:
525 # Shortcut common case - newline found in buffer.
526 i = self._readbuffer.find('\n', self._offset) + 1
527 if i > 0:
528 line = self._readbuffer[self._offset: i]
529 self._offset = i
530 return line
531
532 if not self._universal:
533 return io.BufferedIOBase.readline(self, limit)
534
535 line = ''
536 while limit < 0 or len(line) < limit:
537 readahead = self.peek(2)
538 if readahead == '':
539 return line
540
541 #
542 # Search for universal newlines or line chunks.
543 #
544 # The pattern returns either a line chunk or a newline, but not
545 # both. Combined with peek(2), we are assured that the sequence
546 # '\r\n' is always retrieved completely and never split into
547 # separate newlines - '\r', '\n' due to coincidental readaheads.
548 #
549 match = self.PATTERN.search(readahead)
550 newline = match.group('newline')
551 if newline is not None:
552 if self.newlines is None:
553 self.newlines = []
554 if newline not in self.newlines:
555 self.newlines.append(newline)
556 self._offset += len(newline)
557 return line + '\n'
558
559 chunk = match.group('chunk')
560 if limit >= 0:
561 chunk = chunk[: limit - len(line)]
562
563 self._offset += len(chunk)
564 line += chunk
565
566 return line
567
568 def peek(self, n=1):
569 """Returns buffered bytes without advancing the position."""
570 if n > len(self._readbuffer) - self._offset:
571 chunk = self.read(n)
572 self._offset -= len(chunk)
573
574 # Return up to 512 bytes to reduce allocation overhead for tight loops.
575 return self._readbuffer[self._offset: self._offset + 512]
576
577 def readable(self):
578 return True
579
580 def read(self, n=-1):
581 """Read and return up to n bytes.
582 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
583 """
584 buf = ''
585 if n is None:
586 n = -1
587 while True:
588 if n < 0:
589 data = self.read1(n)
590 elif n > len(buf):
591 data = self.read1(n - len(buf))
592 else:
593 return buf
594 if len(data) == 0:
595 return buf
596 buf += data
597
598 def _update_crc(self, newdata, eof):
599 # Update the CRC using the given data.
600 if self._expected_crc is None:
601 # No need to compute the CRC if we don't have a reference value
602 return
603 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
604 # Check the CRC if we're at the end of the file
605 if eof and self._running_crc != self._expected_crc:
606 raise BadZipfile("Bad CRC-32 for file %r" % self.name)
607
608 def read1(self, n):
609 """Read up to n bytes with at most one read() system call."""
610
611 # Simplify algorithm (branching) by transforming negative n to large n.
612 if n < 0 or n is None:
613 n = self.MAX_N
614
615 # Bytes available in read buffer.
616 len_readbuffer = len(self._readbuffer) - self._offset
617
618 # Read from file.
619 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
620 nbytes = n - len_readbuffer - len(self._unconsumed)
621 nbytes = max(nbytes, self.MIN_READ_SIZE)
622 nbytes = min(nbytes, self._compress_left)
623
624 data = self._fileobj.read(nbytes)
625 self._compress_left -= len(data)
626
627 if data and self._decrypter is not None:
628 data = ''.join(map(self._decrypter, data))
629
630 if self._compress_type == ZIP_STORED:
631 self._update_crc(data, eof=(self._compress_left==0))
632 self._readbuffer = self._readbuffer[self._offset:] + data
633 self._offset = 0
634 else:
635 # Prepare deflated bytes for decompression.
636 self._unconsumed += data
637
638 # Handle unconsumed data.
639 if (len(self._unconsumed) > 0 and n > len_readbuffer and
640 self._compress_type == ZIP_DEFLATED):
641 data = self._decompressor.decompress(
642 self._unconsumed,
643 max(n - len_readbuffer, self.MIN_READ_SIZE)
644 )
645
646 self._unconsumed = self._decompressor.unconsumed_tail
647 eof = len(self._unconsumed) == 0 and self._compress_left == 0
648 if eof:
649 data += self._decompressor.flush()
650
651 self._update_crc(data, eof=eof)
652 self._readbuffer = self._readbuffer[self._offset:] + data
653 self._offset = 0
654 elif (len(self._unconsumed) > 0 and n > len_readbuffer and
655 self._compress_type == ZIP_BZIP2):
656 data = self._decompressor.decompress(self._unconsumed)
657
658 self._unconsumed = ''
659 self._readbuffer = self._readbuffer[self._offset:] + data
660 self._offset = 0
661
662 # Read from buffer.
663 data = self._readbuffer[self._offset: self._offset + n]
664 self._offset += len(data)
665 return data
666
667
668
669 class ZipFile:
670 """ Class with methods to open, read, write, close, list zip files.
671
672 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
673
674 file: Either the path to the file, or a file-like object.
675 If it is a path, the file will be opened and closed by ZipFile.
676 mode: The mode can be either read "r", write "w" or append "a".
677 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
678 or ZIP_BZIP2 (requires bz2).
679 allowZip64: if True ZipFile will create files with ZIP64 extensions when
680 needed, otherwise it will raise an exception when this would
681 be necessary.
682
683 """
684
685 fp = None # Set here since __del__ checks it
686
687 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
688 """Open the ZIP file with mode read "r", write "w" or append "a"."""
689 if mode not in ("r", "w", "a"):
690 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
691
692 if compression == ZIP_STORED:
693 pass
694 elif compression == ZIP_DEFLATED:
695 if not zlib:
696 raise RuntimeError,\
697 "Compression requires the (missing) zlib module"
698 elif compression == ZIP_BZIP2:
699 if not bz2:
700 raise RuntimeError,\
701 "Compression requires the (missing) bz2 module"
702 else:
703 raise RuntimeError, "That compression method is not supported"
704
705 self._allowZip64 = allowZip64
706 self._didModify = False
707 self.debug = 0 # Level of printing: 0 through 3
708 self.NameToInfo = {} # Find file info given name
709 self.filelist = [] # List of ZipInfo instances for archive
710 self.compression = compression # Method of compression
711 self.mode = key = mode.replace('b', '')[0]
712 self.pwd = None
713 self.comment = ''
714
715 # Check if we were passed a file-like object
716 if isinstance(file, basestring):
717 self._filePassed = 0
718 self.filename = file
719 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
720 try:
721 self.fp = open(file, modeDict[mode])
722 except IOError:
723 if mode == 'a':
724 mode = key = 'w'
725 self.fp = open(file, modeDict[mode])
726 else:
727 raise
728 else:
729 self._filePassed = 1
730 self.fp = file
731 self.filename = getattr(file, 'name', None)
732
733 if key == 'r':
734 self._GetContents()
735 elif key == 'w':
736 # set the modified flag so central directory gets written
737 # even if no files are added to the archive
738 self._didModify = True
739 elif key == 'a':
740 try:
741 # See if file is a zip file
742 self._RealGetContents()
743 # seek to start of directory and overwrite
744 self.fp.seek(self.start_dir, 0)
745 except BadZipfile:
746 # file is not a zip file, just append
747 self.fp.seek(0, 2)
748
749 # set the modified flag so central directory gets written
750 # even if no files are added to the archive
751 self._didModify = True
752 else:
753 if not self._filePassed:
754 self.fp.close()
755 self.fp = None
756 raise RuntimeError, 'Mode must be "r", "w" or "a"'
757
758 def __enter__(self):
759 return self
760
761 def __exit__(self, type, value, traceback):
762 self.close()
763
764 def _GetContents(self):
765 """Read the directory, making sure we close the file if the format
766 is bad."""
767 try:
768 self._RealGetContents()
769 except BadZipfile:
770 if not self._filePassed:
771 self.fp.close()
772 self.fp = None
773 raise
774
775 def _RealGetContents(self):
776 """Read in the table of contents for the ZIP file."""
777 fp = self.fp
778 try:
779 endrec = _EndRecData(fp)
780 except IOError:
781 raise BadZipfile("File is not a zip file")
782 if not endrec:
783 raise BadZipfile, "File is not a zip file"
784 if self.debug > 1:
785 print endrec
786 size_cd = endrec[_ECD_SIZE] # bytes in central directory
787 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
788 self.comment = endrec[_ECD_COMMENT] # archive comment
789
790 # "concat" is zero, unless zip was concatenated to another file
791 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
792 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
793 # If Zip64 extension structures are present, account for them
794 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
795
796 if self.debug > 2:
797 inferred = concat + offset_cd
798 print "given, inferred, offset", offset_cd, inferred, concat
799 # self.start_dir: Position of start of central directory
800 self.start_dir = offset_cd + concat
801 fp.seek(self.start_dir, 0)
802 data = fp.read(size_cd)
803 fp = cStringIO.StringIO(data)
804 total = 0
805 while total < size_cd:
806 centdir = fp.read(sizeCentralDir)
807 if centdir[0:4] != stringCentralDir:
808 raise BadZipfile, "Bad magic number for central directory"
809 centdir = struct.unpack(structCentralDir, centdir)
810 if self.debug > 2:
811 print centdir
812 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
813 # Create ZipInfo instance to store file information
814 x = ZipInfo(filename)
815 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
816 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
817 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
818 (x.create_version, x.create_system, x.extract_version, x.reserved,
819 x.flag_bits, x.compress_type, t, d,
820 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
821 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
822 # Convert date/time code to (year, month, day, hour, min, sec)
823 x._raw_time = t
824 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
825 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
826
827 x._decodeExtra()
828 x.header_offset = x.header_offset + concat
829 x.filename = x._decodeFilename()
830 self.filelist.append(x)
831 self.NameToInfo[x.filename] = x
832
833 # update total bytes read from central directory
834 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
835 + centdir[_CD_EXTRA_FIELD_LENGTH]
836 + centdir[_CD_COMMENT_LENGTH])
837
838 if self.debug > 2:
839 print "total", total
840
841
842 def namelist(self):
843 """Return a list of file names in the archive."""
844 l = []
845 for data in self.filelist:
846 l.append(data.filename)
847 return l
848
849 def infolist(self):
850 """Return a list of class ZipInfo instances for files in the
851 archive."""
852 return self.filelist
853
854 def printdir(self):
855 """Print a table of contents for the zip file."""
856 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
857 for zinfo in self.filelist:
858 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
859 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
860
861 def testzip(self):
862 """Read all the files and check the CRC."""
863 chunk_size = 2 ** 20
864 for zinfo in self.filelist:
865 try:
866 # Read by chunks, to avoid an OverflowError or a
867 # MemoryError with very large embedded files.
868 f = self.open(zinfo.filename, "r")
869 while f.read(chunk_size): # Check CRC-32
870 pass
871 except BadZipfile:
872 return zinfo.filename
873
874 def getinfo(self, name):
875 """Return the instance of ZipInfo given 'name'."""
876 info = self.NameToInfo.get(name)
877 if info is None:
878 raise KeyError(
879 'There is no item named %r in the archive' % name)
880
881 return info
882
883 def setpassword(self, pwd):
884 """Set default password for encrypted files."""
885 self.pwd = pwd
886
887 def read(self, name, pwd=None):
888 """Return file bytes (as a string) for name."""
889 return self.open(name, "r", pwd).read()
890
891 def open(self, name, mode="r", pwd=None):
892 """Return file-like object for 'name'."""
893 if mode not in ("r", "U", "rU"):
894 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
895 if not self.fp:
896 raise RuntimeError, \
897 "Attempt to read ZIP archive that was already closed"
898
899 # Only open a new file for instances where we were not
900 # given a file object in the constructor
901 if self._filePassed:
902 zef_file = self.fp
903 else:
904 zef_file = open(self.filename, 'rb')
905
906 # Make sure we have an info object
907 if isinstance(name, ZipInfo):
908 # 'name' is already an info object
909 zinfo = name
910 else:
911 # Get info object for name
912 zinfo = self.getinfo(name)
913
914 zef_file.seek(zinfo.header_offset, 0)
915
916 # Skip the file header:
917 fheader = zef_file.read(sizeFileHeader)
918 if fheader[0:4] != stringFileHeader:
919 raise BadZipfile, "Bad magic number for file header"
920
921 fheader = struct.unpack(structFileHeader, fheader)
922 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
923 if fheader[_FH_EXTRA_FIELD_LENGTH]:
924 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
925
926 if fname != zinfo.orig_filename:
927 raise BadZipfile, \
928 'File name in directory "%s" and header "%s" differ.' % (
929 zinfo.orig_filename, fname)
930
931 # check for encrypted flag & handle password
932 is_encrypted = zinfo.flag_bits & 0x1
933 zd = None
934 if is_encrypted:
935 if not pwd:
936 pwd = self.pwd
937 if not pwd:
938 raise RuntimeError, "File %s is encrypted, " \
939 "password required for extraction" % name
940
941 zd = _ZipDecrypter(pwd)
942 # The first 12 bytes in the cypher stream is an encryption header
943 # used to strengthen the algorithm. The first 11 bytes are
944 # completely random, while the 12th contains the MSB of the CRC,
945 # or the MSB of the file time depending on the header type
946 # and is used to check the correctness of the password.
947 bytes = zef_file.read(12)
948 h = map(zd, bytes[0:12])
949 if zinfo.flag_bits & 0x8:
950 # compare against the file type from extended local headers
951 check_byte = (zinfo._raw_time >> 8) & 0xff
952 else:
953 # compare against the CRC otherwise
954 check_byte = (zinfo.CRC >> 24) & 0xff
955 if ord(h[11]) != check_byte:
956 raise RuntimeError("Bad password for file", name)
957
958 return ZipExtFile(zef_file, mode, zinfo, zd)
959
960 def extract(self, member, path=None, pwd=None):
961 """Extract a member from the archive to the current working directory,
962 using its full name. Its file information is extracted as accurately
963 as possible. `member' may be a filename or a ZipInfo object. You can
964 specify a different directory using `path'.
965 """
966 if not isinstance(member, ZipInfo):
967 member = self.getinfo(member)
968
969 if path is None:
970 path = os.getcwd()
971
972 return self._extract_member(member, path, pwd)
973
974 def extractall(self, path=None, members=None, pwd=None):
975 """Extract all members from the archive to the current working
976 directory. `path' specifies a different directory to extract to.
977 `members' is optional and must be a subset of the list returned
978 by namelist().
979 """
980 if members is None:
981 members = self.namelist()
982
983 for zipinfo in members:
984 self.extract(zipinfo, path, pwd)
985
986 def _extract_member(self, member, targetpath, pwd):
987 """Extract the ZipInfo object 'member' to a physical
988 file on the path targetpath.
989 """
990 # build the destination pathname, replacing
991 # forward slashes to platform specific separators.
992 # Strip trailing path separator, unless it represents the root.
993 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
994 and len(os.path.splitdrive(targetpath)[1]) > 1):
995 targetpath = targetpath[:-1]
996
997 # don't include leading "/" from file name if present
998 if member.filename[0] == '/':
999 targetpath = os.path.join(targetpath, member.filename[1:])
1000 else:
1001 targetpath = os.path.join(targetpath, member.filename)
1002
1003 targetpath = os.path.normpath(targetpath)
1004
1005 # Create all upper directories if necessary.
1006 upperdirs = os.path.dirname(targetpath)
1007 if upperdirs and not os.path.exists(upperdirs):
1008 os.makedirs(upperdirs)
1009
1010 if member.filename[-1] == '/':
1011 if not os.path.isdir(targetpath):
1012 os.mkdir(targetpath)
1013 return targetpath
1014
1015 source = self.open(member, pwd=pwd)
1016 target = file(targetpath, "wb")
1017 shutil.copyfileobj(source, target)
1018 source.close()
1019 target.close()
1020
1021 return targetpath
1022
1023 def _writecheck(self, zinfo):
1024 """Check for errors before writing a file to the archive."""
1025 if zinfo.filename in self.NameToInfo:
1026 if self.debug: # Warning for duplicate names
1027 print "Duplicate name:", zinfo.filename
1028 if self.mode not in ("w", "a"):
1029 raise RuntimeError, 'write() requires mode "w" or "a"'
1030 if not self.fp:
1031 raise RuntimeError, \
1032 "Attempt to write ZIP archive that was already closed"
1033 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1034 raise RuntimeError, \
1035 "Compression requires the (missing) zlib module"
1036 if zinfo.compress_type == ZIP_BZIP2 and not bz2:
1037 raise RuntimeError, \
1038 "Compression requires the (missing) bz2 module"
1039 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED, ZIP_BZIP2):
1040 raise RuntimeError, \
1041 "That compression method is not supported"
1042 if zinfo.file_size > ZIP64_LIMIT:
1043 if not self._allowZip64:
1044 raise LargeZipFile("Filesize would require ZIP64 extensions")
1045 if zinfo.header_offset > ZIP64_LIMIT:
1046 if not self._allowZip64:
1047 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
1048
1049 def write(self, filename, arcname=None, compress_type=None):
1050 """Put the bytes from filename into the archive under the name
1051 arcname."""
1052 if not self.fp:
1053 raise RuntimeError(
1054 "Attempt to write to ZIP archive that was already closed")
1055
1056 st = os.stat(filename)
1057 isdir = stat.S_ISDIR(st.st_mode)
1058 mtime = time.localtime(st.st_mtime)
1059 date_time = mtime[0:6]
1060 # Create ZipInfo instance to store file information
1061 if arcname is None:
1062 arcname = filename
1063 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1064 while arcname[0] in (os.sep, os.altsep):
1065 arcname = arcname[1:]
1066 if isdir:
1067 arcname += '/'
1068 zinfo = ZipInfo(arcname, date_time)
1069 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
1070 if compress_type is None:
1071 zinfo.compress_type = self.compression
1072 else:
1073 zinfo.compress_type = compress_type
1074
1075 zinfo.file_size = st.st_size
1076 zinfo.flag_bits = 0x00
1077 zinfo.header_offset = self.fp.tell() # Start of header bytes
1078
1079 self._writecheck(zinfo)
1080 self._didModify = True
1081
1082 if isdir:
1083 zinfo.file_size = 0
1084 zinfo.compress_size = 0
1085 zinfo.CRC = 0
1086 self.filelist.append(zinfo)
1087 self.NameToInfo[zinfo.filename] = zinfo
1088 self.fp.write(zinfo.FileHeader())
1089 return
1090
1091 with open(filename, "rb") as fp:
1092 # Must overwrite CRC and sizes with correct data later
1093 zinfo.CRC = CRC = 0
1094 zinfo.compress_size = compress_size = 0
1095 zinfo.file_size = file_size = 0
1096 self.fp.write(zinfo.FileHeader())
1097 if zinfo.compress_type == ZIP_DEFLATED:
1098 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1099 zlib.DEFLATED, -15)
1100 elif zinfo.compress_type == ZIP_BZIP2:
1101 cmpr = bz2.BZ2Compressor()
1102 else:
1103 cmpr = None
1104 while 1:
1105 buf = fp.read(1024 * 8)
1106 if not buf:
1107 break
1108 file_size = file_size + len(buf)
1109 CRC = crc32(buf, CRC) & 0xffffffff
1110 if cmpr:
1111 buf = cmpr.compress(buf)
1112 compress_size = compress_size + len(buf)
1113 self.fp.write(buf)
1114 if cmpr:
1115 buf = cmpr.flush()
1116 compress_size = compress_size + len(buf)
1117 self.fp.write(buf)
1118 zinfo.compress_size = compress_size
1119 else:
1120 zinfo.compress_size = file_size
1121 zinfo.CRC = CRC
1122 zinfo.file_size = file_size
1123 # Seek backwards and write CRC and file sizes
1124 position = self.fp.tell() # Preserve current position in file
1125 self.fp.seek(zinfo.header_offset + 14, 0)
1126 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1127 zinfo.file_size))
1128 self.fp.seek(position, 0)
1129 self.filelist.append(zinfo)
1130 self.NameToInfo[zinfo.filename] = zinfo
1131
1132 def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
1133 """Write a file into the archive. The contents is the string
1134 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1135 the name of the file in the archive."""
1136 if not isinstance(zinfo_or_arcname, ZipInfo):
1137 zinfo = ZipInfo(filename=zinfo_or_arcname,
1138 date_time=time.localtime(time.time())[:6])
1139
1140 zinfo.compress_type = self.compression
1141 zinfo.external_attr = 0600 << 16
1142 else:
1143 zinfo = zinfo_or_arcname
1144
1145 if not self.fp:
1146 raise RuntimeError(
1147 "Attempt to write to ZIP archive that was already closed")
1148
1149 if compress_type is not None:
1150 zinfo.compress_type = compress_type
1151
1152 zinfo.file_size = len(bytes) # Uncompressed size
1153 zinfo.header_offset = self.fp.tell() # Start of header bytes
1154 self._writecheck(zinfo)
1155 self._didModify = True
1156 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
1157 if zinfo.compress_type == ZIP_DEFLATED:
1158 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1159 zlib.DEFLATED, -15)
1160 bytes = co.compress(bytes) + co.flush()
1161 zinfo.compress_size = len(bytes) # Compressed size
1162 elif zinfo.compress_type == ZIP_BZIP2:
1163 co = bz2.BZ2Compressor()
1164 bytes = co.compress(bytes) + co.flush()
1165 zinfo.compress_size = len(bytes) # Compressed size
1166 else:
1167 zinfo.compress_size = zinfo.file_size
1168 zinfo.header_offset = self.fp.tell() # Start of header bytes
1169 self.fp.write(zinfo.FileHeader())
1170 self.fp.write(bytes)
1171 self.fp.flush()
1172 if zinfo.flag_bits & 0x08:
1173 # Write CRC and file sizes after the file data
1174 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1175 zinfo.file_size))
1176 self.filelist.append(zinfo)
1177 self.NameToInfo[zinfo.filename] = zinfo
1178
1179 def __del__(self):
1180 """Call the "close()" method in case the user forgot."""
1181 self.close()
1182
1183 def close(self):
1184 """Close the file, and for mode "w" and "a" write the ending
1185 records."""
1186 if self.fp is None:
1187 return
1188
1189 if self.mode in ("w", "a") and self._didModify: # write ending records
1190 count = 0
1191 pos1 = self.fp.tell()
1192 for zinfo in self.filelist: # write central directory
1193 count = count + 1
1194 dt = zinfo.date_time
1195 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1196 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1197 extra = []
1198 if zinfo.file_size > ZIP64_LIMIT \
1199 or zinfo.compress_size > ZIP64_LIMIT:
1200 extra.append(zinfo.file_size)
1201 extra.append(zinfo.compress_size)
1202 file_size = 0xffffffff
1203 compress_size = 0xffffffff
1204 else:
1205 file_size = zinfo.file_size
1206 compress_size = zinfo.compress_size
1207
1208 if zinfo.header_offset > ZIP64_LIMIT:
1209 extra.append(zinfo.header_offset)
1210 header_offset = 0xffffffffL
1211 else:
1212 header_offset = zinfo.header_offset
1213
1214 extra_data = zinfo.extra
1215 if extra:
1216 # Append a ZIP64 field to the extra's
1217 extra_data = struct.pack(
1218 '<HH' + 'Q'*len(extra),
1219 1, 8*len(extra), *extra) + extra_data
1220
1221 extract_version = max(45, zinfo.extract_version)
1222 create_version = max(45, zinfo.create_version)
1223 else:
1224 extract_version = zinfo.extract_version
1225 create_version = zinfo.create_version
1226
1227 try:
1228 filename, flag_bits = zinfo._encodeFilenameFlags()
1229 centdir = struct.pack(structCentralDir,
1230 stringCentralDir, create_version,
1231 zinfo.create_system, extract_version, zinfo.reserved,
1232 flag_bits, zinfo.compress_type, dostime, dosdate,
1233 zinfo.CRC, compress_size, file_size,
1234 len(filename), len(extra_data), len(zinfo.comment),
1235 0, zinfo.internal_attr, zinfo.external_attr,
1236 header_offset)
1237 except DeprecationWarning:
1238 print >>sys.stderr, (structCentralDir,
1239 stringCentralDir, create_version,
1240 zinfo.create_system, extract_version, zinfo.reserved,
1241 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1242 zinfo.CRC, compress_size, file_size,
1243 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1244 0, zinfo.internal_attr, zinfo.external_attr,
1245 header_offset)
1246 raise
1247 self.fp.write(centdir)
1248 self.fp.write(filename)
1249 self.fp.write(extra_data)
1250 self.fp.write(zinfo.comment)
1251
1252 pos2 = self.fp.tell()
1253 # Write end-of-zip-archive record
1254 centDirCount = count
1255 centDirSize = pos2 - pos1
1256 centDirOffset = pos1
1257 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1258 centDirOffset > ZIP64_LIMIT or
1259 centDirSize > ZIP64_LIMIT):
1260 # Need to write the ZIP64 end-of-archive records
1261 zip64endrec = struct.pack(
1262 structEndArchive64, stringEndArchive64,
1263 44, 45, 45, 0, 0, centDirCount, centDirCount,
1264 centDirSize, centDirOffset)
1265 self.fp.write(zip64endrec)
1266
1267 zip64locrec = struct.pack(
1268 structEndArchive64Locator,
1269 stringEndArchive64Locator, 0, pos2, 1)
1270 self.fp.write(zip64locrec)
1271 centDirCount = min(centDirCount, 0xFFFF)
1272 centDirSize = min(centDirSize, 0xFFFFFFFF)
1273 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1274
1275 # check for valid comment length
1276 if len(self.comment) >= ZIP_MAX_COMMENT:
1277 if self.debug > 0:
1278 msg = 'Archive comment is too long; truncating to %d bytes' \
1279 % ZIP_MAX_COMMENT
1280 self.comment = self.comment[:ZIP_MAX_COMMENT]
1281
1282 endrec = struct.pack(structEndArchive, stringEndArchive,
1283 0, 0, centDirCount, centDirCount,
1284 centDirSize, centDirOffset, len(self.comment))
1285 self.fp.write(endrec)
1286 self.fp.write(self.comment)
1287 self.fp.flush()
1288
1289 if not self._filePassed:
1290 self.fp.close()
1291 self.fp = None
1292
1293
1294 class PyZipFile(ZipFile):
1295 """Class to create ZIP archives with Python library files and packages."""
1296
1297 def writepy(self, pathname, basename = ""):
1298 """Add all files from "pathname" to the ZIP archive.
1299
1300 If pathname is a package directory, search the directory and
1301 all package subdirectories recursively for all *.py and enter
1302 the modules into the archive. If pathname is a plain
1303 directory, listdir *.py and enter all modules. Else, pathname
1304 must be a Python *.py file and the module will be put into the
1305 archive. Added modules are always module.pyo or module.pyc.
1306 This method will compile the module.py into module.pyc if
1307 necessary.
1308 """
1309 dir, name = os.path.split(pathname)
1310 if os.path.isdir(pathname):
1311 initname = os.path.join(pathname, "__init__.py")
1312 if os.path.isfile(initname):
1313 # This is a package directory, add it
1314 if basename:
1315 basename = "%s/%s" % (basename, name)
1316 else:
1317 basename = name
1318 if self.debug:
1319 print "Adding package in", pathname, "as", basename
1320 fname, arcname = self._get_codename(initname[0:-3], basename)
1321 if self.debug:
1322 print "Adding", arcname
1323 self.write(fname, arcname)
1324 dirlist = os.listdir(pathname)
1325 dirlist.remove("__init__.py")
1326 # Add all *.py files and package subdirectories
1327 for filename in dirlist:
1328 path = os.path.join(pathname, filename)
1329 root, ext = os.path.splitext(filename)
1330 if os.path.isdir(path):
1331 if os.path.isfile(os.path.join(path, "__init__.py")):
1332 # This is a package directory, add it
1333 self.writepy(path, basename) # Recursive call
1334 elif ext == ".py":
1335 fname, arcname = self._get_codename(path[0:-3],
1336 basename)
1337 if self.debug:
1338 print "Adding", arcname
1339 self.write(fname, arcname)
1340 else:
1341 # This is NOT a package directory, add its files at top level
1342 if self.debug:
1343 print "Adding files from directory", pathname
1344 for filename in os.listdir(pathname):
1345 path = os.path.join(pathname, filename)
1346 root, ext = os.path.splitext(filename)
1347 if ext == ".py":
1348 fname, arcname = self._get_codename(path[0:-3],
1349 basename)
1350 if self.debug:
1351 print "Adding", arcname
1352 self.write(fname, arcname)
1353 else:
1354 if pathname[-3:] != ".py":
1355 raise RuntimeError, \
1356 'Files added with writepy() must end with ".py"'
1357 fname, arcname = self._get_codename(pathname[0:-3], basename)
1358 if self.debug:
1359 print "Adding file", arcname
1360 self.write(fname, arcname)
1361
1362 def _get_codename(self, pathname, basename):
1363 """Return (filename, archivename) for the path.
1364
1365 Given a module name path, return the correct file path and
1366 archive name, compiling if necessary. For example, given
1367 /python/lib/string, return (/python/lib/string.pyc, string).
1368 """
1369 file_py = pathname + ".py"
1370 file_pyc = pathname + ".pyc"
1371 file_pyo = pathname + ".pyo"
1372 if os.path.isfile(file_pyo) and \
1373 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1374 fname = file_pyo # Use .pyo file
1375 elif not os.path.isfile(file_pyc) or \
1376 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1377 import py_compile
1378 if self.debug:
1379 print "Compiling", file_py
1380 try:
1381 py_compile.compile(file_py, file_pyc, None, True)
1382 except py_compile.PyCompileError,err:
1383 print err.msg
1384 fname = file_pyc
1385 else:
1386 fname = file_pyc
1387 archivename = os.path.split(fname)[1]
1388 if basename:
1389 archivename = "%s/%s" % (basename, archivename)
1390 return (fname, archivename)
1391
1392
1393 def main(args = None):
1394 import textwrap
1395 USAGE=textwrap.dedent("""\
1396 Usage:
1397 zipfile.py -l zipfile.zip # Show listing of a zipfile
1398 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1399 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1400 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1401 """)
1402 if args is None:
1403 args = sys.argv[1:]
1404
1405 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1406 print USAGE
1407 sys.exit(1)
1408
1409 if args[0] == '-l':
1410 if len(args) != 2:
1411 print USAGE
1412 sys.exit(1)
1413 zf = ZipFile(args[1], 'r')
1414 zf.printdir()
1415 zf.close()
1416
1417 elif args[0] == '-t':
1418 if len(args) != 2:
1419 print USAGE
1420 sys.exit(1)
1421 zf = ZipFile(args[1], 'r')
1422 badfile = zf.testzip()
1423 if badfile:
1424 print("The following enclosed file is corrupted: {!r}".format(badfile))
1425 print "Done testing"
1426
1427 elif args[0] == '-e':
1428 if len(args) != 3:
1429 print USAGE
1430 sys.exit(1)
1431
1432 zf = ZipFile(args[1], 'r')
1433 out = args[2]
1434 for path in zf.namelist():
1435 if path.startswith('./'):
1436 tgt = os.path.join(out, path[2:])
1437 else:
1438 tgt = os.path.join(out, path)
1439
1440 tgtdir = os.path.dirname(tgt)
1441 if not os.path.exists(tgtdir):
1442 os.makedirs(tgtdir)
1443 with open(tgt, 'wb') as fp:
1444 fp.write(zf.read(path))
1445 zf.close()
1446
1447 elif args[0] == '-c':
1448 if len(args) < 3:
1449 print USAGE
1450 sys.exit(1)
1451
1452 def addToZip(zf, path, zippath):
1453 if os.path.isfile(path):
1454 zf.write(path, zippath, ZIP_DEFLATED)
1455 elif os.path.isdir(path):
1456 for nm in os.listdir(path):
1457 addToZip(zf,
1458 os.path.join(path, nm), os.path.join(zippath, nm))
1459 # else: ignore
1460
1461 zf = ZipFile(args[1], 'w', allowZip64=True)
1462 for src in args[2:]:
1463 addToZip(zf, src, os.path.basename(src))
1464
1465 zf.close()
1466
1467 if __name__ == "__main__":
1468 main()