comparison zipfile.py @ 0:b9622c1e2197

Created the repository
author Oleg Oshmyan <chortos@inbox.lv>
date Fri, 22 Jan 2010 18:20:10 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:b9622c1e2197
1 """
2 Read and write ZIP files.
3 """
4 # Improved by Chortos-2 in 2009 (added bzip2 support)
5 import struct, os, time, sys, shutil
6 import binascii, cStringIO, stat
7
8 try:
9 import zlib # We may need its compression method
10 crc32 = zlib.crc32
11 except ImportError:
12 zlib = None
13 crc32 = binascii.crc32
14
15 try:
16 import bz2 # We may need its compression method
17 except ImportError:
18 bz2 = None
19
20 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
21 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
22
23 class BadZipfile(Exception):
24 pass
25
26
27 class LargeZipFile(Exception):
28 """
29 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
30 and those extensions are disabled.
31 """
32
33 error = BadZipfile # The exception raised by this module
34
35 ZIP64_LIMIT = (1 << 31) - 1
36 ZIP_FILECOUNT_LIMIT = 1 << 16
37 ZIP_MAX_COMMENT = (1 << 16) - 1
38
39 # constants for Zip file compression methods
40 ZIP_STORED = 0
41 ZIP_DEFLATED = 8
42 ZIP_BZIP2 = 12
43 # Other ZIP compression methods not supported
44
45 # Below are some formats and associated data for reading/writing headers using
46 # the struct module. The names and structures of headers/records are those used
47 # in the PKWARE description of the ZIP file format:
48 # http://www.pkware.com/documents/casestudies/APPNOTE.TXT
49 # (URL valid as of January 2008)
50
51 # The "end of central directory" structure, magic number, size, and indices
52 # (section V.I in the format document)
53 structEndArchive = "<4s4H2LH"
54 stringEndArchive = "PK\005\006"
55 sizeEndCentDir = struct.calcsize(structEndArchive)
56
57 _ECD_SIGNATURE = 0
58 _ECD_DISK_NUMBER = 1
59 _ECD_DISK_START = 2
60 _ECD_ENTRIES_THIS_DISK = 3
61 _ECD_ENTRIES_TOTAL = 4
62 _ECD_SIZE = 5
63 _ECD_OFFSET = 6
64 _ECD_COMMENT_SIZE = 7
65 # These last two indices are not part of the structure as defined in the
66 # spec, but they are used internally by this module as a convenience
67 _ECD_COMMENT = 8
68 _ECD_LOCATION = 9
69
70 # The "central directory" structure, magic number, size, and indices
71 # of entries in the structure (section V.F in the format document)
72 structCentralDir = "<4s4B4HL2L5H2L"
73 stringCentralDir = "PK\001\002"
74 sizeCentralDir = struct.calcsize(structCentralDir)
75
76 # indexes of entries in the central directory structure
77 _CD_SIGNATURE = 0
78 _CD_CREATE_VERSION = 1
79 _CD_CREATE_SYSTEM = 2
80 _CD_EXTRACT_VERSION = 3
81 _CD_EXTRACT_SYSTEM = 4
82 _CD_FLAG_BITS = 5
83 _CD_COMPRESS_TYPE = 6
84 _CD_TIME = 7
85 _CD_DATE = 8
86 _CD_CRC = 9
87 _CD_COMPRESSED_SIZE = 10
88 _CD_UNCOMPRESSED_SIZE = 11
89 _CD_FILENAME_LENGTH = 12
90 _CD_EXTRA_FIELD_LENGTH = 13
91 _CD_COMMENT_LENGTH = 14
92 _CD_DISK_NUMBER_START = 15
93 _CD_INTERNAL_FILE_ATTRIBUTES = 16
94 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
95 _CD_LOCAL_HEADER_OFFSET = 18
96
97 # The "local file header" structure, magic number, size, and indices
98 # (section V.A in the format document)
99 structFileHeader = "<4s2B4HL2L2H"
100 stringFileHeader = "PK\003\004"
101 sizeFileHeader = struct.calcsize(structFileHeader)
102
103 _FH_SIGNATURE = 0
104 _FH_EXTRACT_VERSION = 1
105 _FH_EXTRACT_SYSTEM = 2
106 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
107 _FH_COMPRESSION_METHOD = 4
108 _FH_LAST_MOD_TIME = 5
109 _FH_LAST_MOD_DATE = 6
110 _FH_CRC = 7
111 _FH_COMPRESSED_SIZE = 8
112 _FH_UNCOMPRESSED_SIZE = 9
113 _FH_FILENAME_LENGTH = 10
114 _FH_EXTRA_FIELD_LENGTH = 11
115
116 # The "Zip64 end of central directory locator" structure, magic number, and size
117 structEndArchive64Locator = "<4sLQL"
118 stringEndArchive64Locator = "PK\x06\x07"
119 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
120
121 # The "Zip64 end of central directory" record, magic number, size, and indices
122 # (section V.G in the format document)
123 structEndArchive64 = "<4sQ2H2L4Q"
124 stringEndArchive64 = "PK\x06\x06"
125 sizeEndCentDir64 = struct.calcsize(structEndArchive64)
126
127 _CD64_SIGNATURE = 0
128 _CD64_DIRECTORY_RECSIZE = 1
129 _CD64_CREATE_VERSION = 2
130 _CD64_EXTRACT_VERSION = 3
131 _CD64_DISK_NUMBER = 4
132 _CD64_DISK_NUMBER_START = 5
133 _CD64_NUMBER_ENTRIES_THIS_DISK = 6
134 _CD64_NUMBER_ENTRIES_TOTAL = 7
135 _CD64_DIRECTORY_SIZE = 8
136 _CD64_OFFSET_START_CENTDIR = 9
137
138 def is_zipfile(filename):
139 """Quickly see if file is a ZIP file by checking the magic number."""
140 try:
141 fpin = open(filename, "rb")
142 endrec = _EndRecData(fpin)
143 fpin.close()
144 if endrec:
145 return True # file has correct magic number
146 except IOError:
147 pass
148 return False
149
150 def _EndRecData64(fpin, offset, endrec):
151 """
152 Read the ZIP64 end-of-archive records and use that to update endrec
153 """
154 fpin.seek(offset - sizeEndCentDir64Locator, 2)
155 data = fpin.read(sizeEndCentDir64Locator)
156 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
157 if sig != stringEndArchive64Locator:
158 return endrec
159
160 if diskno != 0 or disks != 1:
161 raise BadZipfile("zipfiles that span multiple disks are not supported")
162
163 # Assume no 'zip64 extensible data'
164 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
165 data = fpin.read(sizeEndCentDir64)
166 sig, sz, create_version, read_version, disk_num, disk_dir, \
167 dircount, dircount2, dirsize, diroffset = \
168 struct.unpack(structEndArchive64, data)
169 if sig != stringEndArchive64:
170 return endrec
171
172 # Update the original endrec using data from the ZIP64 record
173 endrec[_ECD_SIGNATURE] = sig
174 endrec[_ECD_DISK_NUMBER] = disk_num
175 endrec[_ECD_DISK_START] = disk_dir
176 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
177 endrec[_ECD_ENTRIES_TOTAL] = dircount2
178 endrec[_ECD_SIZE] = dirsize
179 endrec[_ECD_OFFSET] = diroffset
180 return endrec
181
182
183 def _EndRecData(fpin):
184 """Return data from the "End of Central Directory" record, or None.
185
186 The data is a list of the nine items in the ZIP "End of central dir"
187 record followed by a tenth item, the file seek offset of this record."""
188
189 # Determine file size
190 fpin.seek(0, 2)
191 filesize = fpin.tell()
192
193 # Check to see if this is ZIP file with no archive comment (the
194 # "end of central directory" structure should be the last item in the
195 # file if this is the case).
196 fpin.seek(-sizeEndCentDir, 2)
197 data = fpin.read()
198 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
199 # the signature is correct and there's no comment, unpack structure
200 endrec = struct.unpack(structEndArchive, data)
201 endrec=list(endrec)
202
203 # Append a blank comment and record start offset
204 endrec.append("")
205 endrec.append(filesize - sizeEndCentDir)
206
207 # Try to read the "Zip64 end of central directory" structure
208 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
209
210 # Either this is not a ZIP file, or it is a ZIP file with an archive
211 # comment. Search the end of the file for the "end of central directory"
212 # record signature. The comment is the last item in the ZIP file and may be
213 # up to 64K long. It is assumed that the "end of central directory" magic
214 # number does not appear in the comment.
215 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
216 fpin.seek(maxCommentStart, 0)
217 data = fpin.read()
218 start = data.rfind(stringEndArchive)
219 if start >= 0:
220 # found the magic number; attempt to unpack and interpret
221 recData = data[start:start+sizeEndCentDir]
222 endrec = list(struct.unpack(structEndArchive, recData))
223 comment = data[start+sizeEndCentDir:]
224 # check that comment length is correct
225 if endrec[_ECD_COMMENT_SIZE] == len(comment):
226 # Append the archive comment and start offset
227 endrec.append(comment)
228 endrec.append(maxCommentStart + start)
229
230 # Try to read the "Zip64 end of central directory" structure
231 return _EndRecData64(fpin, maxCommentStart + start - filesize,
232 endrec)
233
234 # Unable to find a valid end of central directory structure
235 return
236
237
238 class ZipInfo (object):
239 """Class with attributes describing each file in the ZIP archive."""
240
241 __slots__ = (
242 'orig_filename',
243 'filename',
244 'date_time',
245 'compress_type',
246 'comment',
247 'extra',
248 'create_system',
249 'create_version',
250 'extract_version',
251 'reserved',
252 'flag_bits',
253 'volume',
254 'internal_attr',
255 'external_attr',
256 'header_offset',
257 'CRC',
258 'compress_size',
259 'file_size',
260 '_raw_time',
261 )
262
263 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
264 self.orig_filename = filename # Original file name in archive
265
266 # Terminate the file name at the first null byte. Null bytes in file
267 # names are used as tricks by viruses in archives.
268 null_byte = filename.find(chr(0))
269 if null_byte >= 0:
270 filename = filename[0:null_byte]
271 # This is used to ensure paths in generated ZIP files always use
272 # forward slashes as the directory separator, as required by the
273 # ZIP format specification.
274 if os.sep != "/" and os.sep in filename:
275 filename = filename.replace(os.sep, "/")
276
277 self.filename = filename # Normalized file name
278 self.date_time = date_time # year, month, day, hour, min, sec
279 # Standard values:
280 self.compress_type = ZIP_STORED # Type of compression for the file
281 self.comment = "" # Comment for each file
282 self.extra = "" # ZIP extra data
283 if sys.platform == 'win32':
284 self.create_system = 0 # System which created ZIP archive
285 else:
286 # Assume everything else is unix-y
287 self.create_system = 3 # System which created ZIP archive
288 self.create_version = 20 # Version which created ZIP archive
289 self.extract_version = 20 # Version needed to extract archive
290 self.reserved = 0 # Must be zero
291 self.flag_bits = 0 # ZIP flag bits
292 self.volume = 0 # Volume number of file header
293 self.internal_attr = 0 # Internal attributes
294 self.external_attr = 0 # External file attributes
295 # Other attributes are set by class ZipFile:
296 # header_offset Byte offset to the file header
297 # CRC CRC-32 of the uncompressed file
298 # compress_size Size of the compressed file
299 # file_size Size of the uncompressed file
300
301 def FileHeader(self):
302 """Return the per-file header as a string."""
303 dt = self.date_time
304 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
305 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
306 if self.flag_bits & 0x08:
307 # Set these to zero because we write them after the file data
308 CRC = compress_size = file_size = 0
309 else:
310 CRC = self.CRC
311 compress_size = self.compress_size
312 file_size = self.file_size
313
314 extra = self.extra
315
316 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
317 # File is larger than what fits into a 4 byte integer,
318 # fall back to the ZIP64 extension
319 fmt = '<HHQQ'
320 extra = extra + struct.pack(fmt,
321 1, struct.calcsize(fmt)-4, file_size, compress_size)
322 file_size = 0xffffffff
323 compress_size = 0xffffffff
324 self.extract_version = max(45, self.extract_version)
325 self.create_version = max(45, self.extract_version)
326
327 filename, flag_bits = self._encodeFilenameFlags()
328 header = struct.pack(structFileHeader, stringFileHeader,
329 self.extract_version, self.reserved, flag_bits,
330 self.compress_type, dostime, dosdate, CRC,
331 compress_size, file_size,
332 len(filename), len(extra))
333 return header + filename + extra
334
335 def _encodeFilenameFlags(self):
336 if isinstance(self.filename, unicode):
337 try:
338 return self.filename.encode('ascii'), self.flag_bits
339 except UnicodeEncodeError:
340 return self.filename.encode('utf-8'), self.flag_bits | 0x800
341 else:
342 return self.filename, self.flag_bits
343
344 def _decodeFilename(self):
345 if self.flag_bits & 0x800:
346 return self.filename.decode('utf-8')
347 else:
348 return self.filename
349
350 def _decodeExtra(self):
351 # Try to decode the extra field.
352 extra = self.extra
353 unpack = struct.unpack
354 while extra:
355 tp, ln = unpack('<HH', extra[:4])
356 if tp == 1:
357 if ln >= 24:
358 counts = unpack('<QQQ', extra[4:28])
359 elif ln == 16:
360 counts = unpack('<QQ', extra[4:20])
361 elif ln == 8:
362 counts = unpack('<Q', extra[4:12])
363 elif ln == 0:
364 counts = ()
365 else:
366 raise RuntimeError, "Corrupt extra field %s"%(ln,)
367
368 idx = 0
369
370 # ZIP64 extension (large files and/or large archives)
371 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
372 self.file_size = counts[idx]
373 idx += 1
374
375 if self.compress_size == 0xFFFFFFFFL:
376 self.compress_size = counts[idx]
377 idx += 1
378
379 if self.header_offset == 0xffffffffL:
380 old = self.header_offset
381 self.header_offset = counts[idx]
382 idx+=1
383
384 extra = extra[ln+4:]
385
386
387 class _ZipDecrypter:
388 """Class to handle decryption of files stored within a ZIP archive.
389
390 ZIP supports a password-based form of encryption. Even though known
391 plaintext attacks have been found against it, it is still useful
392 to be able to get data out of such a file.
393
394 Usage:
395 zd = _ZipDecrypter(mypwd)
396 plain_char = zd(cypher_char)
397 plain_text = map(zd, cypher_text)
398 """
399
400 def _GenerateCRCTable():
401 """Generate a CRC-32 table.
402
403 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
404 internal keys. We noticed that a direct implementation is faster than
405 relying on binascii.crc32().
406 """
407 poly = 0xedb88320
408 table = [0] * 256
409 for i in range(256):
410 crc = i
411 for j in range(8):
412 if crc & 1:
413 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
414 else:
415 crc = ((crc >> 1) & 0x7FFFFFFF)
416 table[i] = crc
417 return table
418 crctable = _GenerateCRCTable()
419
420 def _crc32(self, ch, crc):
421 """Compute the CRC32 primitive on one byte."""
422 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
423
424 def __init__(self, pwd):
425 self.key0 = 305419896
426 self.key1 = 591751049
427 self.key2 = 878082192
428 for p in pwd:
429 self._UpdateKeys(p)
430
431 def _UpdateKeys(self, c):
432 self.key0 = self._crc32(c, self.key0)
433 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
434 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
435 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
436
437 def __call__(self, c):
438 """Decrypt a single character."""
439 c = ord(c)
440 k = self.key2 | 2
441 c = c ^ (((k * (k^1)) >> 8) & 255)
442 c = chr(c)
443 self._UpdateKeys(c)
444 return c
445
446 class ZipExtFile:
447 """File-like object for reading an archive member.
448 Is returned by ZipFile.open().
449 """
450
451 def __init__(self, fileobj, zipinfo, decrypt=None):
452 self.fileobj = fileobj
453 self.decrypter = decrypt
454 self.bytes_read = 0L
455 self.rawbuffer = ''
456 self.readbuffer = ''
457 self.linebuffer = ''
458 self.eof = False
459 self.univ_newlines = False
460 self.nlSeps = ("\n", )
461 self.lastdiscard = ''
462
463 self.compress_type = zipinfo.compress_type
464 self.compress_size = zipinfo.compress_size
465
466 self.closed = False
467 self.mode = "r"
468 self.name = zipinfo.filename
469
470 # read from compressed files in 64k blocks
471 self.compreadsize = 64*1024
472 if self.compress_type == ZIP_DEFLATED:
473 self.dc = zlib.decompressobj(-15)
474 elif self.compress_type == ZIP_BZIP2:
475 self.dc = bz2.BZ2Decompressor()
476 self.compreadsize = 900000
477 elif self.compress_type != ZIP_STORED:
478 raise RuntimeError, "The compression method of this file is not supported"
479
480 def set_univ_newlines(self, univ_newlines):
481 self.univ_newlines = univ_newlines
482
483 # pick line separator char(s) based on universal newlines flag
484 self.nlSeps = ("\n", )
485 if self.univ_newlines:
486 self.nlSeps = ("\r\n", "\r", "\n")
487
488 def __iter__(self):
489 return self
490
491 def next(self):
492 nextline = self.readline()
493 if not nextline:
494 raise StopIteration()
495
496 return nextline
497
498 def close(self):
499 self.closed = True
500
501 def _checkfornewline(self):
502 nl, nllen = -1, -1
503 if self.linebuffer:
504 # ugly check for cases where half of an \r\n pair was
505 # read on the last pass, and the \r was discarded. In this
506 # case we just throw away the \n at the start of the buffer.
507 if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
508 self.linebuffer = self.linebuffer[1:]
509
510 for sep in self.nlSeps:
511 nl = self.linebuffer.find(sep)
512 if nl >= 0:
513 nllen = len(sep)
514 return nl, nllen
515
516 return nl, nllen
517
518 def readline(self, size = -1):
519 """Read a line with approx. size. If size is negative,
520 read a whole line.
521 """
522 if size < 0:
523 size = sys.maxint
524 elif size == 0:
525 return ''
526
527 # check for a newline already in buffer
528 nl, nllen = self._checkfornewline()
529
530 if nl >= 0:
531 # the next line was already in the buffer
532 nl = min(nl, size)
533 else:
534 # no line break in buffer - try to read more
535 size -= len(self.linebuffer)
536 while nl < 0 and size > 0:
537 buf = self.read(min(size, 100))
538 if not buf:
539 break
540 self.linebuffer += buf
541 size -= len(buf)
542
543 # check for a newline in buffer
544 nl, nllen = self._checkfornewline()
545
546 # we either ran out of bytes in the file, or
547 # met the specified size limit without finding a newline,
548 # so return current buffer
549 if nl < 0:
550 s = self.linebuffer
551 self.linebuffer = ''
552 return s
553
554 buf = self.linebuffer[:nl]
555 self.lastdiscard = self.linebuffer[nl:nl + nllen]
556 self.linebuffer = self.linebuffer[nl + nllen:]
557
558 # line is always returned with \n as newline char (except possibly
559 # for a final incomplete line in the file, which is handled above).
560 return buf + "\n"
561
562 def readlines(self, sizehint = -1):
563 """Return a list with all (following) lines. The sizehint parameter
564 is ignored in this implementation.
565 """
566 result = []
567 while True:
568 line = self.readline()
569 if not line: break
570 result.append(line)
571 return result
572
573 def read(self, size = None):
574 # act like file() obj and return empty string if size is 0
575 if size == 0:
576 return ''
577
578 # determine read size
579 bytesToRead = self.compress_size - self.bytes_read
580
581 # adjust read size for encrypted files since the first 12 bytes
582 # are for the encryption/password information
583 if self.decrypter is not None:
584 bytesToRead -= 12
585
586 if size is not None and size >= 0:
587 if self.compress_type == ZIP_STORED:
588 lr = len(self.readbuffer)
589 bytesToRead = min(bytesToRead, size - lr)
590 else:
591 if len(self.readbuffer) > size:
592 # the user has requested fewer bytes than we've already
593 # pulled through the decompressor; don't read any more
594 bytesToRead = 0
595 else:
596 # user will use up the buffer, so read some more
597 lr = len(self.rawbuffer)
598 bytesToRead = min(bytesToRead, self.compreadsize - lr)
599
600 # avoid reading past end of file contents
601 if bytesToRead + self.bytes_read > self.compress_size:
602 bytesToRead = self.compress_size - self.bytes_read
603
604 # try to read from file (if necessary)
605 if bytesToRead > 0:
606 bytes = self.fileobj.read(bytesToRead)
607 self.bytes_read += len(bytes)
608 self.rawbuffer += bytes
609
610 # handle contents of raw buffer
611 if self.rawbuffer:
612 newdata = self.rawbuffer
613 self.rawbuffer = ''
614
615 # decrypt new data if we were given an object to handle that
616 if newdata and self.decrypter is not None:
617 newdata = ''.join(map(self.decrypter, newdata))
618
619 # decompress newly read data if necessary
620 if newdata and self.compress_type != ZIP_STORED:
621 newdata = self.dc.decompress(newdata)
622 self.rawbuffer = self.dc.unconsumed_tail if self.compress_type == ZIP_DEFLATED else ''
623 if self.eof and len(self.rawbuffer) == 0:
624 # we're out of raw bytes (both from the file and
625 # the local buffer); flush just to make sure the
626 # decompressor is done
627 newdata += self.dc.flush()
628 # prevent decompressor from being used again
629 self.dc = None
630
631 self.readbuffer += newdata
632
633
634 # return what the user asked for
635 if size is None or len(self.readbuffer) <= size:
636 bytes = self.readbuffer
637 self.readbuffer = ''
638 else:
639 bytes = self.readbuffer[:size]
640 self.readbuffer = self.readbuffer[size:]
641
642 return bytes
643
644
645 class ZipFile:
646 """ Class with methods to open, read, write, close, list zip files.
647
648 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
649
650 file: Either the path to the file, or a file-like object.
651 If it is a path, the file will be opened and closed by ZipFile.
652 mode: The mode can be either read "r", write "w" or append "a".
653 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib) or ZIP_BZIP2 (requires bz2).
654 allowZip64: if True ZipFile will create files with ZIP64 extensions when
655 needed, otherwise it will raise an exception when this would
656 be necessary.
657
658 """
659
660 fp = None # Set here since __del__ checks it
661
662 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
663 """Open the ZIP file with mode read "r", write "w" or append "a"."""
664 if mode not in ("r", "w", "a"):
665 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
666
667 if compression == ZIP_STORED:
668 pass
669 elif compression == ZIP_DEFLATED:
670 if not zlib:
671 raise RuntimeError,\
672 "Compression requires the (missing) zlib module"
673 elif compression == ZIP_BZIP2:
674 if not bz2:
675 raise RuntimeError,\
676 "Compression requires the (missing) bz2 module"
677 else:
678 raise RuntimeError, "That compression method is not supported"
679
680 self._allowZip64 = allowZip64
681 self._didModify = False
682 self.debug = 0 # Level of printing: 0 through 3
683 self.NameToInfo = {} # Find file info given name
684 self.filelist = [] # List of ZipInfo instances for archive
685 self.compression = compression # Method of compression
686 self.mode = key = mode.replace('b', '')[0]
687 self.pwd = None
688 self.comment = ''
689
690 # Check if we were passed a file-like object
691 if isinstance(file, basestring):
692 self._filePassed = 0
693 self.filename = file
694 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
695 try:
696 self.fp = open(file, modeDict[mode])
697 except IOError:
698 if mode == 'a':
699 mode = key = 'w'
700 self.fp = open(file, modeDict[mode])
701 else:
702 raise
703 else:
704 self._filePassed = 1
705 self.fp = file
706 self.filename = getattr(file, 'name', None)
707
708 if key == 'r':
709 self._GetContents()
710 elif key == 'w':
711 pass
712 elif key == 'a':
713 try: # See if file is a zip file
714 self._RealGetContents()
715 # seek to start of directory and overwrite
716 self.fp.seek(self.start_dir, 0)
717 except BadZipfile: # file is not a zip file, just append
718 self.fp.seek(0, 2)
719 else:
720 if not self._filePassed:
721 self.fp.close()
722 self.fp = None
723 raise RuntimeError, 'Mode must be "r", "w" or "a"'
724
725 def _GetContents(self):
726 """Read the directory, making sure we close the file if the format
727 is bad."""
728 try:
729 self._RealGetContents()
730 except BadZipfile:
731 if not self._filePassed:
732 self.fp.close()
733 self.fp = None
734 raise
735
736 def _RealGetContents(self):
737 """Read in the table of contents for the ZIP file."""
738 fp = self.fp
739 endrec = _EndRecData(fp)
740 if not endrec:
741 raise BadZipfile, "File is not a zip file"
742 if self.debug > 1:
743 print endrec
744 size_cd = endrec[_ECD_SIZE] # bytes in central directory
745 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
746 self.comment = endrec[_ECD_COMMENT] # archive comment
747
748 # "concat" is zero, unless zip was concatenated to another file
749 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
750 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
751 # If Zip64 extension structures are present, account for them
752 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
753
754 if self.debug > 2:
755 inferred = concat + offset_cd
756 print "given, inferred, offset", offset_cd, inferred, concat
757 # self.start_dir: Position of start of central directory
758 self.start_dir = offset_cd + concat
759 fp.seek(self.start_dir, 0)
760 data = fp.read(size_cd)
761 fp = cStringIO.StringIO(data)
762 total = 0
763 while total < size_cd:
764 centdir = fp.read(sizeCentralDir)
765 if centdir[0:4] != stringCentralDir:
766 raise BadZipfile, "Bad magic number for central directory"
767 centdir = struct.unpack(structCentralDir, centdir)
768 if self.debug > 2:
769 print centdir
770 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
771 # Create ZipInfo instance to store file information
772 x = ZipInfo(filename)
773 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
774 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
775 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
776 (x.create_version, x.create_system, x.extract_version, x.reserved,
777 x.flag_bits, x.compress_type, t, d,
778 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
779 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
780 # Convert date/time code to (year, month, day, hour, min, sec)
781 x._raw_time = t
782 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
783 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
784
785 x._decodeExtra()
786 x.header_offset = x.header_offset + concat
787 x.filename = x._decodeFilename()
788 self.filelist.append(x)
789 self.NameToInfo[x.filename] = x
790
791 # update total bytes read from central directory
792 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
793 + centdir[_CD_EXTRA_FIELD_LENGTH]
794 + centdir[_CD_COMMENT_LENGTH])
795
796 if self.debug > 2:
797 print "total", total
798
799
800 def namelist(self):
801 """Return a list of file names in the archive."""
802 l = []
803 for data in self.filelist:
804 l.append(data.filename)
805 return l
806
807 def infolist(self):
808 """Return a list of class ZipInfo instances for files in the
809 archive."""
810 return self.filelist
811
812 def printdir(self):
813 """Print a table of contents for the zip file."""
814 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
815 for zinfo in self.filelist:
816 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
817 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
818
819 def testzip(self):
820 """Read all the files and check the CRC."""
821 chunk_size = 2 ** 20
822 for zinfo in self.filelist:
823 try:
824 # Read by chunks, to avoid an OverflowError or a
825 # MemoryError with very large embedded files.
826 f = self.open(zinfo.filename, "r")
827 while f.read(chunk_size): # Check CRC-32
828 pass
829 except BadZipfile:
830 return zinfo.filename
831
832 def getinfo(self, name):
833 """Return the instance of ZipInfo given 'name'."""
834 info = self.NameToInfo.get(name)
835 if info is None:
836 raise KeyError(
837 'There is no item named %r in the archive' % name)
838
839 return info
840
841 def setpassword(self, pwd):
842 """Set default password for encrypted files."""
843 self.pwd = pwd
844
845 def read(self, name, pwd=None):
846 """Return file bytes (as a string) for name."""
847 return self.open(name, "r", pwd).read()
848
849 def open(self, name, mode="r", pwd=None):
850 """Return file-like object for 'name'."""
851 if mode not in ("r", "U", "rU"):
852 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
853 if not self.fp:
854 raise RuntimeError, \
855 "Attempt to read ZIP archive that was already closed"
856
857 # Only open a new file for instances where we were not
858 # given a file object in the constructor
859 if self._filePassed:
860 zef_file = self.fp
861 else:
862 zef_file = open(self.filename, 'rb')
863
864 # Make sure we have an info object
865 if isinstance(name, ZipInfo):
866 # 'name' is already an info object
867 zinfo = name
868 else:
869 # Get info object for name
870 zinfo = self.getinfo(name)
871
872 zef_file.seek(zinfo.header_offset, 0)
873
874 # Skip the file header:
875 fheader = zef_file.read(sizeFileHeader)
876 if fheader[0:4] != stringFileHeader:
877 raise BadZipfile, "Bad magic number for file header"
878
879 fheader = struct.unpack(structFileHeader, fheader)
880 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
881 if fheader[_FH_EXTRA_FIELD_LENGTH]:
882 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
883
884 if fname != zinfo.orig_filename:
885 raise BadZipfile, \
886 'File name in directory "%s" and header "%s" differ.' % (
887 zinfo.orig_filename, fname)
888
889 # check for encrypted flag & handle password
890 is_encrypted = zinfo.flag_bits & 0x1
891 zd = None
892 if is_encrypted:
893 if not pwd:
894 pwd = self.pwd
895 if not pwd:
896 raise RuntimeError, "File %s is encrypted, " \
897 "password required for extraction" % name
898
899 zd = _ZipDecrypter(pwd)
900 # The first 12 bytes in the cypher stream is an encryption header
901 # used to strengthen the algorithm. The first 11 bytes are
902 # completely random, while the 12th contains the MSB of the CRC,
903 # or the MSB of the file time depending on the header type
904 # and is used to check the correctness of the password.
905 bytes = zef_file.read(12)
906 h = map(zd, bytes[0:12])
907 if zinfo.flag_bits & 0x8:
908 # compare against the file type from extended local headers
909 check_byte = (zinfo._raw_time >> 8) & 0xff
910 else:
911 # compare against the CRC otherwise
912 check_byte = (zinfo.CRC >> 24) & 0xff
913 if ord(h[11]) != check_byte:
914 raise RuntimeError("Bad password for file", name)
915
916 # build and return a ZipExtFile
917 if zd is None:
918 zef = ZipExtFile(zef_file, zinfo)
919 else:
920 zef = ZipExtFile(zef_file, zinfo, zd)
921
922 # set universal newlines on ZipExtFile if necessary
923 if "U" in mode:
924 zef.set_univ_newlines(True)
925 return zef
926
927 def extract(self, member, path=None, pwd=None):
928 """Extract a member from the archive to the current working directory,
929 using its full name. Its file information is extracted as accurately
930 as possible. `member' may be a filename or a ZipInfo object. You can
931 specify a different directory using `path'.
932 """
933 if not isinstance(member, ZipInfo):
934 member = self.getinfo(member)
935
936 if path is None:
937 path = os.getcwd()
938
939 return self._extract_member(member, path, pwd)
940
941 def extractall(self, path=None, members=None, pwd=None):
942 """Extract all members from the archive to the current working
943 directory. `path' specifies a different directory to extract to.
944 `members' is optional and must be a subset of the list returned
945 by namelist().
946 """
947 if members is None:
948 members = self.namelist()
949
950 for zipinfo in members:
951 self.extract(zipinfo, path, pwd)
952
953 def _extract_member(self, member, targetpath, pwd):
954 """Extract the ZipInfo object 'member' to a physical
955 file on the path targetpath.
956 """
957 # build the destination pathname, replacing
958 # forward slashes to platform specific separators.
959 # Strip trailing path separator, unless it represents the root.
960 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
961 and len(os.path.splitdrive(targetpath)[1]) > 1):
962 targetpath = targetpath[:-1]
963
964 # don't include leading "/" from file name if present
965 if member.filename[0] == '/':
966 targetpath = os.path.join(targetpath, member.filename[1:])
967 else:
968 targetpath = os.path.join(targetpath, member.filename)
969
970 targetpath = os.path.normpath(targetpath)
971
972 # Create all upper directories if necessary.
973 upperdirs = os.path.dirname(targetpath)
974 if upperdirs and not os.path.exists(upperdirs):
975 os.makedirs(upperdirs)
976
977 if member.filename[-1] == '/':
978 if not os.path.isdir(targetpath):
979 os.mkdir(targetpath)
980 return targetpath
981
982 source = self.open(member, pwd=pwd)
983 target = file(targetpath, "wb")
984 shutil.copyfileobj(source, target)
985 source.close()
986 target.close()
987
988 return targetpath
989
990 def _writecheck(self, zinfo):
991 """Check for errors before writing a file to the archive."""
992 if zinfo.filename in self.NameToInfo:
993 if self.debug: # Warning for duplicate names
994 print "Duplicate name:", zinfo.filename
995 if self.mode not in ("w", "a"):
996 raise RuntimeError, 'write() requires mode "w" or "a"'
997 if not self.fp:
998 raise RuntimeError, \
999 "Attempt to write ZIP archive that was already closed"
1000 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1001 raise RuntimeError, \
1002 "Compression requires the (missing) zlib module"
1003 if zinfo.compress_type == ZIP_BZIP2 and not bz2:
1004 raise RuntimeError, \
1005 "Compression requires the (missing) bz2 module"
1006 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED, ZIP_BZIP2):
1007 raise RuntimeError, \
1008 "That compression method is not supported"
1009 if zinfo.file_size > ZIP64_LIMIT:
1010 if not self._allowZip64:
1011 raise LargeZipFile("Filesize would require ZIP64 extensions")
1012 if zinfo.header_offset > ZIP64_LIMIT:
1013 if not self._allowZip64:
1014 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
1015
1016 def write(self, filename, arcname=None, compress_type=None):
1017 """Put the bytes from filename into the archive under the name
1018 arcname."""
1019 if not self.fp:
1020 raise RuntimeError(
1021 "Attempt to write to ZIP archive that was already closed")
1022
1023 st = os.stat(filename)
1024 isdir = stat.S_ISDIR(st.st_mode)
1025 mtime = time.localtime(st.st_mtime)
1026 date_time = mtime[0:6]
1027 # Create ZipInfo instance to store file information
1028 if arcname is None:
1029 arcname = filename
1030 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1031 while arcname[0] in (os.sep, os.altsep):
1032 arcname = arcname[1:]
1033 if isdir:
1034 arcname += '/'
1035 zinfo = ZipInfo(arcname, date_time)
1036 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
1037 if compress_type is None:
1038 zinfo.compress_type = self.compression
1039 else:
1040 zinfo.compress_type = compress_type
1041
1042 zinfo.file_size = st.st_size
1043 zinfo.flag_bits = 0x00
1044 zinfo.header_offset = self.fp.tell() # Start of header bytes
1045
1046 self._writecheck(zinfo)
1047 self._didModify = True
1048
1049 if isdir:
1050 zinfo.file_size = 0
1051 zinfo.compress_size = 0
1052 zinfo.CRC = 0
1053 self.filelist.append(zinfo)
1054 self.NameToInfo[zinfo.filename] = zinfo
1055 self.fp.write(zinfo.FileHeader())
1056 return
1057
1058 fp = open(filename, "rb")
1059 # Must overwrite CRC and sizes with correct data later
1060 zinfo.CRC = CRC = 0
1061 zinfo.compress_size = compress_size = 0
1062 zinfo.file_size = file_size = 0
1063 self.fp.write(zinfo.FileHeader())
1064 if zinfo.compress_type == ZIP_DEFLATED:
1065 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1066 zlib.DEFLATED, -15)
1067 elif zinfo.compress_type == ZIP_BZIP2:
1068 cmpr = bz2.BZ2Compressor()
1069 else:
1070 cmpr = None
1071 while 1:
1072 buf = fp.read(1024 * 8)
1073 if not buf:
1074 break
1075 file_size = file_size + len(buf)
1076 CRC = crc32(buf, CRC) & 0xffffffff
1077 if cmpr:
1078 buf = cmpr.compress(buf)
1079 compress_size = compress_size + len(buf)
1080 self.fp.write(buf)
1081 fp.close()
1082 if cmpr:
1083 buf = cmpr.flush()
1084 compress_size = compress_size + len(buf)
1085 self.fp.write(buf)
1086 zinfo.compress_size = compress_size
1087 else:
1088 zinfo.compress_size = file_size
1089 zinfo.CRC = CRC
1090 zinfo.file_size = file_size
1091 # Seek backwards and write CRC and file sizes
1092 position = self.fp.tell() # Preserve current position in file
1093 self.fp.seek(zinfo.header_offset + 14, 0)
1094 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1095 zinfo.file_size))
1096 self.fp.seek(position, 0)
1097 self.filelist.append(zinfo)
1098 self.NameToInfo[zinfo.filename] = zinfo
1099
1100 def writestr(self, zinfo_or_arcname, bytes):
1101 """Write a file into the archive. The contents is the string
1102 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1103 the name of the file in the archive."""
1104 if not isinstance(zinfo_or_arcname, ZipInfo):
1105 zinfo = ZipInfo(filename=zinfo_or_arcname,
1106 date_time=time.localtime(time.time())[:6])
1107 zinfo.compress_type = self.compression
1108 zinfo.external_attr = 0600 << 16
1109 else:
1110 zinfo = zinfo_or_arcname
1111
1112 if not self.fp:
1113 raise RuntimeError(
1114 "Attempt to write to ZIP archive that was already closed")
1115
1116 zinfo.file_size = len(bytes) # Uncompressed size
1117 zinfo.header_offset = self.fp.tell() # Start of header bytes
1118 self._writecheck(zinfo)
1119 self._didModify = True
1120 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
1121 if zinfo.compress_type == ZIP_DEFLATED:
1122 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1123 zlib.DEFLATED, -15)
1124 bytes = co.compress(bytes) + co.flush()
1125 zinfo.compress_size = len(bytes) # Compressed size
1126 elif zinfo.compress_type == ZIP_BZIP2:
1127 co = bz2.BZ2Compressor()
1128 bytes = co.compress(bytes) + co.flush()
1129 zinfo.compress_size = len(bytes) # Compressed size
1130 else:
1131 zinfo.compress_size = zinfo.file_size
1132 zinfo.header_offset = self.fp.tell() # Start of header bytes
1133 self.fp.write(zinfo.FileHeader())
1134 self.fp.write(bytes)
1135 self.fp.flush()
1136 if zinfo.flag_bits & 0x08:
1137 # Write CRC and file sizes after the file data
1138 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1139 zinfo.file_size))
1140 self.filelist.append(zinfo)
1141 self.NameToInfo[zinfo.filename] = zinfo
1142
1143 def __del__(self):
1144 """Call the "close()" method in case the user forgot."""
1145 self.close()
1146
1147 def close(self):
1148 """Close the file, and for mode "w" and "a" write the ending
1149 records."""
1150 if self.fp is None:
1151 return
1152
1153 if self.mode in ("w", "a") and self._didModify: # write ending records
1154 count = 0
1155 pos1 = self.fp.tell()
1156 for zinfo in self.filelist: # write central directory
1157 count = count + 1
1158 dt = zinfo.date_time
1159 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1160 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1161 extra = []
1162 if zinfo.file_size > ZIP64_LIMIT \
1163 or zinfo.compress_size > ZIP64_LIMIT:
1164 extra.append(zinfo.file_size)
1165 extra.append(zinfo.compress_size)
1166 file_size = 0xffffffff
1167 compress_size = 0xffffffff
1168 else:
1169 file_size = zinfo.file_size
1170 compress_size = zinfo.compress_size
1171
1172 if zinfo.header_offset > ZIP64_LIMIT:
1173 extra.append(zinfo.header_offset)
1174 header_offset = 0xffffffffL
1175 else:
1176 header_offset = zinfo.header_offset
1177
1178 extra_data = zinfo.extra
1179 if extra:
1180 # Append a ZIP64 field to the extra's
1181 extra_data = struct.pack(
1182 '<HH' + 'Q'*len(extra),
1183 1, 8*len(extra), *extra) + extra_data
1184
1185 extract_version = max(45, zinfo.extract_version)
1186 create_version = max(45, zinfo.create_version)
1187 else:
1188 extract_version = zinfo.extract_version
1189 create_version = zinfo.create_version
1190
1191 try:
1192 filename, flag_bits = zinfo._encodeFilenameFlags()
1193 centdir = struct.pack(structCentralDir,
1194 stringCentralDir, create_version,
1195 zinfo.create_system, extract_version, zinfo.reserved,
1196 flag_bits, zinfo.compress_type, dostime, dosdate,
1197 zinfo.CRC, compress_size, file_size,
1198 len(filename), len(extra_data), len(zinfo.comment),
1199 0, zinfo.internal_attr, zinfo.external_attr,
1200 header_offset)
1201 except DeprecationWarning:
1202 print >>sys.stderr, (structCentralDir,
1203 stringCentralDir, create_version,
1204 zinfo.create_system, extract_version, zinfo.reserved,
1205 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1206 zinfo.CRC, compress_size, file_size,
1207 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1208 0, zinfo.internal_attr, zinfo.external_attr,
1209 header_offset)
1210 raise
1211 self.fp.write(centdir)
1212 self.fp.write(filename)
1213 self.fp.write(extra_data)
1214 self.fp.write(zinfo.comment)
1215
1216 pos2 = self.fp.tell()
1217 # Write end-of-zip-archive record
1218 centDirCount = count
1219 centDirSize = pos2 - pos1
1220 centDirOffset = pos1
1221 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1222 centDirOffset > ZIP64_LIMIT or
1223 centDirSize > ZIP64_LIMIT):
1224 # Need to write the ZIP64 end-of-archive records
1225 zip64endrec = struct.pack(
1226 structEndArchive64, stringEndArchive64,
1227 44, 45, 45, 0, 0, centDirCount, centDirCount,
1228 centDirSize, centDirOffset)
1229 self.fp.write(zip64endrec)
1230
1231 zip64locrec = struct.pack(
1232 structEndArchive64Locator,
1233 stringEndArchive64Locator, 0, pos2, 1)
1234 self.fp.write(zip64locrec)
1235 centDirCount = min(centDirCount, 0xFFFF)
1236 centDirSize = min(centDirSize, 0xFFFFFFFF)
1237 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1238
1239 # check for valid comment length
1240 if len(self.comment) >= ZIP_MAX_COMMENT:
1241 if self.debug > 0:
1242 msg = 'Archive comment is too long; truncating to %d bytes' \
1243 % ZIP_MAX_COMMENT
1244 self.comment = self.comment[:ZIP_MAX_COMMENT]
1245
1246 endrec = struct.pack(structEndArchive, stringEndArchive,
1247 0, 0, centDirCount, centDirCount,
1248 centDirSize, centDirOffset, len(self.comment))
1249 self.fp.write(endrec)
1250 self.fp.write(self.comment)
1251 self.fp.flush()
1252
1253 if not self._filePassed:
1254 self.fp.close()
1255 self.fp = None
1256
1257
1258 class PyZipFile(ZipFile):
1259 """Class to create ZIP archives with Python library files and packages."""
1260
1261 def writepy(self, pathname, basename = ""):
1262 """Add all files from "pathname" to the ZIP archive.
1263
1264 If pathname is a package directory, search the directory and
1265 all package subdirectories recursively for all *.py and enter
1266 the modules into the archive. If pathname is a plain
1267 directory, listdir *.py and enter all modules. Else, pathname
1268 must be a Python *.py file and the module will be put into the
1269 archive. Added modules are always module.pyo or module.pyc.
1270 This method will compile the module.py into module.pyc if
1271 necessary.
1272 """
1273 dir, name = os.path.split(pathname)
1274 if os.path.isdir(pathname):
1275 initname = os.path.join(pathname, "__init__.py")
1276 if os.path.isfile(initname):
1277 # This is a package directory, add it
1278 if basename:
1279 basename = "%s/%s" % (basename, name)
1280 else:
1281 basename = name
1282 if self.debug:
1283 print "Adding package in", pathname, "as", basename
1284 fname, arcname = self._get_codename(initname[0:-3], basename)
1285 if self.debug:
1286 print "Adding", arcname
1287 self.write(fname, arcname)
1288 dirlist = os.listdir(pathname)
1289 dirlist.remove("__init__.py")
1290 # Add all *.py files and package subdirectories
1291 for filename in dirlist:
1292 path = os.path.join(pathname, filename)
1293 root, ext = os.path.splitext(filename)
1294 if os.path.isdir(path):
1295 if os.path.isfile(os.path.join(path, "__init__.py")):
1296 # This is a package directory, add it
1297 self.writepy(path, basename) # Recursive call
1298 elif ext == ".py":
1299 fname, arcname = self._get_codename(path[0:-3],
1300 basename)
1301 if self.debug:
1302 print "Adding", arcname
1303 self.write(fname, arcname)
1304 else:
1305 # This is NOT a package directory, add its files at top level
1306 if self.debug:
1307 print "Adding files from directory", pathname
1308 for filename in os.listdir(pathname):
1309 path = os.path.join(pathname, filename)
1310 root, ext = os.path.splitext(filename)
1311 if ext == ".py":
1312 fname, arcname = self._get_codename(path[0:-3],
1313 basename)
1314 if self.debug:
1315 print "Adding", arcname
1316 self.write(fname, arcname)
1317 else:
1318 if pathname[-3:] != ".py":
1319 raise RuntimeError, \
1320 'Files added with writepy() must end with ".py"'
1321 fname, arcname = self._get_codename(pathname[0:-3], basename)
1322 if self.debug:
1323 print "Adding file", arcname
1324 self.write(fname, arcname)
1325
1326 def _get_codename(self, pathname, basename):
1327 """Return (filename, archivename) for the path.
1328
1329 Given a module name path, return the correct file path and
1330 archive name, compiling if necessary. For example, given
1331 /python/lib/string, return (/python/lib/string.pyc, string).
1332 """
1333 file_py = pathname + ".py"
1334 file_pyc = pathname + ".pyc"
1335 file_pyo = pathname + ".pyo"
1336 if os.path.isfile(file_pyo) and \
1337 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1338 fname = file_pyo # Use .pyo file
1339 elif not os.path.isfile(file_pyc) or \
1340 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1341 import py_compile
1342 if self.debug:
1343 print "Compiling", file_py
1344 try:
1345 py_compile.compile(file_py, file_pyc, None, True)
1346 except py_compile.PyCompileError,err:
1347 print err.msg
1348 fname = file_pyc
1349 else:
1350 fname = file_pyc
1351 archivename = os.path.split(fname)[1]
1352 if basename:
1353 archivename = "%s/%s" % (basename, archivename)
1354 return (fname, archivename)
1355
1356
1357 def main(args = None):
1358 import textwrap
1359 USAGE=textwrap.dedent("""\
1360 Usage:
1361 zipfile.py -l zipfile.zip # Show listing of a zipfile
1362 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1363 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1364 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1365 """)
1366 if args is None:
1367 args = sys.argv[1:]
1368
1369 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1370 print USAGE
1371 sys.exit(1)
1372
1373 if args[0] == '-l':
1374 if len(args) != 2:
1375 print USAGE
1376 sys.exit(1)
1377 zf = ZipFile(args[1], 'r')
1378 zf.printdir()
1379 zf.close()
1380
1381 elif args[0] == '-t':
1382 if len(args) != 2:
1383 print USAGE
1384 sys.exit(1)
1385 zf = ZipFile(args[1], 'r')
1386 zf.testzip()
1387 print "Done testing"
1388
1389 elif args[0] == '-e':
1390 if len(args) != 3:
1391 print USAGE
1392 sys.exit(1)
1393
1394 zf = ZipFile(args[1], 'r')
1395 out = args[2]
1396 for path in zf.namelist():
1397 if path.startswith('./'):
1398 tgt = os.path.join(out, path[2:])
1399 else:
1400 tgt = os.path.join(out, path)
1401
1402 tgtdir = os.path.dirname(tgt)
1403 if not os.path.exists(tgtdir):
1404 os.makedirs(tgtdir)
1405 fp = open(tgt, 'wb')
1406 fp.write(zf.read(path))
1407 fp.close()
1408 zf.close()
1409
1410 elif args[0] == '-c':
1411 if len(args) < 3:
1412 print USAGE
1413 sys.exit(1)
1414
1415 def addToZip(zf, path, zippath):
1416 if os.path.isfile(path):
1417 zf.write(path, zippath, ZIP_DEFLATED)
1418 elif os.path.isdir(path):
1419 for nm in os.listdir(path):
1420 addToZip(zf,
1421 os.path.join(path, nm), os.path.join(zippath, nm))
1422 # else: ignore
1423
1424 zf = ZipFile(args[1], 'w', allowZip64=True)
1425 for src in args[2:]:
1426 addToZip(zf, src, os.path.basename(src))
1427
1428 zf.close()
1429
1430 if __name__ == "__main__":
1431 main()