comparison zipfiles/zipfile26.py @ 153:45d4a9dc707b

Moved everything to do with zipfile to a new folder named zipfiles
author Oleg Oshmyan <chortos@inbox.lv>
date Fri, 03 Jun 2011 20:22:08 +0100
parents zipfile26.py@4ea7133ac25c
children
comparison
equal deleted inserted replaced
152:7951219d9866 153:45d4a9dc707b
1 """
2 Read and write ZIP files.
3 """
4 # Improved by Chortos-2 in 2009 and 2010 (added bzip2 support)
5 import struct, os, time, sys, shutil
6 import binascii, cStringIO, stat
7
8 try:
9 import zlib # We may need its compression method
10 crc32 = zlib.crc32
11 except ImportError:
12 zlib = None
13 crc32 = binascii.crc32
14
15 try:
16 import bz2 # We may need its compression method
17 except ImportError:
18 bz2 = None
19
20 __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
21 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile", "ZIP_BZIP2" ]
22
23 class BadZipfile(Exception):
24 pass
25
26
27 class LargeZipFile(Exception):
28 """
29 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
30 and those extensions are disabled.
31 """
32
33 error = BadZipfile # The exception raised by this module
34
35 ZIP64_LIMIT = (1 << 31) - 1
36 ZIP_FILECOUNT_LIMIT = 1 << 16
37 ZIP_MAX_COMMENT = (1 << 16) - 1
38
39 # constants for Zip file compression methods
40 ZIP_STORED = 0
41 ZIP_DEFLATED = 8
42 ZIP_BZIP2 = 12
43 # Other ZIP compression methods not supported
44
45 # Below are some formats and associated data for reading/writing headers using
46 # the struct module. The names and structures of headers/records are those used
47 # in the PKWARE description of the ZIP file format:
48 # http://www.pkware.com/documents/casestudies/APPNOTE.TXT
49 # (URL valid as of January 2008)
50
51 # The "end of central directory" structure, magic number, size, and indices
52 # (section V.I in the format document)
53 structEndArchive = "<4s4H2LH"
54 stringEndArchive = "PK\005\006"
55 sizeEndCentDir = struct.calcsize(structEndArchive)
56
57 _ECD_SIGNATURE = 0
58 _ECD_DISK_NUMBER = 1
59 _ECD_DISK_START = 2
60 _ECD_ENTRIES_THIS_DISK = 3
61 _ECD_ENTRIES_TOTAL = 4
62 _ECD_SIZE = 5
63 _ECD_OFFSET = 6
64 _ECD_COMMENT_SIZE = 7
65 # These last two indices are not part of the structure as defined in the
66 # spec, but they are used internally by this module as a convenience
67 _ECD_COMMENT = 8
68 _ECD_LOCATION = 9
69
70 # The "central directory" structure, magic number, size, and indices
71 # of entries in the structure (section V.F in the format document)
72 structCentralDir = "<4s4B4HL2L5H2L"
73 stringCentralDir = "PK\001\002"
74 sizeCentralDir = struct.calcsize(structCentralDir)
75
76 # indexes of entries in the central directory structure
77 _CD_SIGNATURE = 0
78 _CD_CREATE_VERSION = 1
79 _CD_CREATE_SYSTEM = 2
80 _CD_EXTRACT_VERSION = 3
81 _CD_EXTRACT_SYSTEM = 4
82 _CD_FLAG_BITS = 5
83 _CD_COMPRESS_TYPE = 6
84 _CD_TIME = 7
85 _CD_DATE = 8
86 _CD_CRC = 9
87 _CD_COMPRESSED_SIZE = 10
88 _CD_UNCOMPRESSED_SIZE = 11
89 _CD_FILENAME_LENGTH = 12
90 _CD_EXTRA_FIELD_LENGTH = 13
91 _CD_COMMENT_LENGTH = 14
92 _CD_DISK_NUMBER_START = 15
93 _CD_INTERNAL_FILE_ATTRIBUTES = 16
94 _CD_EXTERNAL_FILE_ATTRIBUTES = 17
95 _CD_LOCAL_HEADER_OFFSET = 18
96
97 # The "local file header" structure, magic number, size, and indices
98 # (section V.A in the format document)
99 structFileHeader = "<4s2B4HL2L2H"
100 stringFileHeader = "PK\003\004"
101 sizeFileHeader = struct.calcsize(structFileHeader)
102
103 _FH_SIGNATURE = 0
104 _FH_EXTRACT_VERSION = 1
105 _FH_EXTRACT_SYSTEM = 2
106 _FH_GENERAL_PURPOSE_FLAG_BITS = 3
107 _FH_COMPRESSION_METHOD = 4
108 _FH_LAST_MOD_TIME = 5
109 _FH_LAST_MOD_DATE = 6
110 _FH_CRC = 7
111 _FH_COMPRESSED_SIZE = 8
112 _FH_UNCOMPRESSED_SIZE = 9
113 _FH_FILENAME_LENGTH = 10
114 _FH_EXTRA_FIELD_LENGTH = 11
115
116 # The "Zip64 end of central directory locator" structure, magic number, and size
117 structEndArchive64Locator = "<4sLQL"
118 stringEndArchive64Locator = "PK\x06\x07"
119 sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
120
121 # The "Zip64 end of central directory" record, magic number, size, and indices
122 # (section V.G in the format document)
123 structEndArchive64 = "<4sQ2H2L4Q"
124 stringEndArchive64 = "PK\x06\x06"
125 sizeEndCentDir64 = struct.calcsize(structEndArchive64)
126
127 _CD64_SIGNATURE = 0
128 _CD64_DIRECTORY_RECSIZE = 1
129 _CD64_CREATE_VERSION = 2
130 _CD64_EXTRACT_VERSION = 3
131 _CD64_DISK_NUMBER = 4
132 _CD64_DISK_NUMBER_START = 5
133 _CD64_NUMBER_ENTRIES_THIS_DISK = 6
134 _CD64_NUMBER_ENTRIES_TOTAL = 7
135 _CD64_DIRECTORY_SIZE = 8
136 _CD64_OFFSET_START_CENTDIR = 9
137
138 def is_zipfile(filename):
139 """Quickly see if file is a ZIP file by checking the magic number."""
140 try:
141 fpin = open(filename, "rb")
142 endrec = _EndRecData(fpin)
143 fpin.close()
144 if endrec:
145 return True # file has correct magic number
146 except IOError:
147 pass
148 return False
149
150 def _EndRecData64(fpin, offset, endrec):
151 """
152 Read the ZIP64 end-of-archive records and use that to update endrec
153 """
154 fpin.seek(offset - sizeEndCentDir64Locator, 2)
155 data = fpin.read(sizeEndCentDir64Locator)
156 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
157 if sig != stringEndArchive64Locator:
158 return endrec
159
160 if diskno != 0 or disks != 1:
161 raise BadZipfile("zipfiles that span multiple disks are not supported")
162
163 # Assume no 'zip64 extensible data'
164 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
165 data = fpin.read(sizeEndCentDir64)
166 sig, sz, create_version, read_version, disk_num, disk_dir, \
167 dircount, dircount2, dirsize, diroffset = \
168 struct.unpack(structEndArchive64, data)
169 if sig != stringEndArchive64:
170 return endrec
171
172 # Update the original endrec using data from the ZIP64 record
173 endrec[_ECD_SIGNATURE] = sig
174 endrec[_ECD_DISK_NUMBER] = disk_num
175 endrec[_ECD_DISK_START] = disk_dir
176 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
177 endrec[_ECD_ENTRIES_TOTAL] = dircount2
178 endrec[_ECD_SIZE] = dirsize
179 endrec[_ECD_OFFSET] = diroffset
180 return endrec
181
182
183 def _EndRecData(fpin):
184 """Return data from the "End of Central Directory" record, or None.
185
186 The data is a list of the nine items in the ZIP "End of central dir"
187 record followed by a tenth item, the file seek offset of this record."""
188
189 # Determine file size
190 fpin.seek(0, 2)
191 filesize = fpin.tell()
192
193 # Check to see if this is ZIP file with no archive comment (the
194 # "end of central directory" structure should be the last item in the
195 # file if this is the case).
196 try:
197 fpin.seek(-sizeEndCentDir, 2)
198 except IOError:
199 return None
200 data = fpin.read()
201 if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
202 # the signature is correct and there's no comment, unpack structure
203 endrec = struct.unpack(structEndArchive, data)
204 endrec=list(endrec)
205
206 # Append a blank comment and record start offset
207 endrec.append("")
208 endrec.append(filesize - sizeEndCentDir)
209
210 # Try to read the "Zip64 end of central directory" structure
211 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
212
213 # Either this is not a ZIP file, or it is a ZIP file with an archive
214 # comment. Search the end of the file for the "end of central directory"
215 # record signature. The comment is the last item in the ZIP file and may be
216 # up to 64K long. It is assumed that the "end of central directory" magic
217 # number does not appear in the comment.
218 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
219 fpin.seek(maxCommentStart, 0)
220 data = fpin.read()
221 start = data.rfind(stringEndArchive)
222 if start >= 0:
223 # found the magic number; attempt to unpack and interpret
224 recData = data[start:start+sizeEndCentDir]
225 endrec = list(struct.unpack(structEndArchive, recData))
226 comment = data[start+sizeEndCentDir:]
227 # check that comment length is correct
228 if endrec[_ECD_COMMENT_SIZE] == len(comment):
229 # Append the archive comment and start offset
230 endrec.append(comment)
231 endrec.append(maxCommentStart + start)
232
233 # Try to read the "Zip64 end of central directory" structure
234 return _EndRecData64(fpin, maxCommentStart + start - filesize,
235 endrec)
236
237 # Unable to find a valid end of central directory structure
238 return
239
240
241 class ZipInfo (object):
242 """Class with attributes describing each file in the ZIP archive."""
243
244 __slots__ = (
245 'orig_filename',
246 'filename',
247 'date_time',
248 'compress_type',
249 'comment',
250 'extra',
251 'create_system',
252 'create_version',
253 'extract_version',
254 'reserved',
255 'flag_bits',
256 'volume',
257 'internal_attr',
258 'external_attr',
259 'header_offset',
260 'CRC',
261 'compress_size',
262 'file_size',
263 '_raw_time',
264 )
265
266 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
267 self.orig_filename = filename # Original file name in archive
268
269 # Terminate the file name at the first null byte. Null bytes in file
270 # names are used as tricks by viruses in archives.
271 null_byte = filename.find(chr(0))
272 if null_byte >= 0:
273 filename = filename[0:null_byte]
274 # This is used to ensure paths in generated ZIP files always use
275 # forward slashes as the directory separator, as required by the
276 # ZIP format specification.
277 if os.sep != "/" and os.sep in filename:
278 filename = filename.replace(os.sep, "/")
279
280 self.filename = filename # Normalized file name
281 self.date_time = date_time # year, month, day, hour, min, sec
282 # Standard values:
283 self.compress_type = ZIP_STORED # Type of compression for the file
284 self.comment = "" # Comment for each file
285 self.extra = "" # ZIP extra data
286 if sys.platform == 'win32':
287 self.create_system = 0 # System which created ZIP archive
288 else:
289 # Assume everything else is unix-y
290 self.create_system = 3 # System which created ZIP archive
291 self.create_version = 20 # Version which created ZIP archive
292 self.extract_version = 20 # Version needed to extract archive
293 self.reserved = 0 # Must be zero
294 self.flag_bits = 0 # ZIP flag bits
295 self.volume = 0 # Volume number of file header
296 self.internal_attr = 0 # Internal attributes
297 self.external_attr = 0 # External file attributes
298 # Other attributes are set by class ZipFile:
299 # header_offset Byte offset to the file header
300 # CRC CRC-32 of the uncompressed file
301 # compress_size Size of the compressed file
302 # file_size Size of the uncompressed file
303
304 def FileHeader(self):
305 """Return the per-file header as a string."""
306 dt = self.date_time
307 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
308 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
309 if self.flag_bits & 0x08:
310 # Set these to zero because we write them after the file data
311 CRC = compress_size = file_size = 0
312 else:
313 CRC = self.CRC
314 compress_size = self.compress_size
315 file_size = self.file_size
316
317 extra = self.extra
318
319 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
320 # File is larger than what fits into a 4 byte integer,
321 # fall back to the ZIP64 extension
322 fmt = '<HHQQ'
323 extra = extra + struct.pack(fmt,
324 1, struct.calcsize(fmt)-4, file_size, compress_size)
325 file_size = 0xffffffff
326 compress_size = 0xffffffff
327 self.extract_version = max(45, self.extract_version)
328 self.create_version = max(45, self.extract_version)
329
330 filename, flag_bits = self._encodeFilenameFlags()
331 header = struct.pack(structFileHeader, stringFileHeader,
332 self.extract_version, self.reserved, flag_bits,
333 self.compress_type, dostime, dosdate, CRC,
334 compress_size, file_size,
335 len(filename), len(extra))
336 return header + filename + extra
337
338 def _encodeFilenameFlags(self):
339 if isinstance(self.filename, unicode):
340 try:
341 return self.filename.encode('ascii'), self.flag_bits
342 except UnicodeEncodeError:
343 return self.filename.encode('utf-8'), self.flag_bits | 0x800
344 else:
345 return self.filename, self.flag_bits
346
347 def _decodeFilename(self):
348 if self.flag_bits & 0x800:
349 return self.filename.decode('utf-8')
350 else:
351 return self.filename
352
353 def _decodeExtra(self):
354 # Try to decode the extra field.
355 extra = self.extra
356 unpack = struct.unpack
357 while extra:
358 tp, ln = unpack('<HH', extra[:4])
359 if tp == 1:
360 if ln >= 24:
361 counts = unpack('<QQQ', extra[4:28])
362 elif ln == 16:
363 counts = unpack('<QQ', extra[4:20])
364 elif ln == 8:
365 counts = unpack('<Q', extra[4:12])
366 elif ln == 0:
367 counts = ()
368 else:
369 raise RuntimeError, "Corrupt extra field %s"%(ln,)
370
371 idx = 0
372
373 # ZIP64 extension (large files and/or large archives)
374 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
375 self.file_size = counts[idx]
376 idx += 1
377
378 if self.compress_size == 0xFFFFFFFFL:
379 self.compress_size = counts[idx]
380 idx += 1
381
382 if self.header_offset == 0xffffffffL:
383 old = self.header_offset
384 self.header_offset = counts[idx]
385 idx+=1
386
387 extra = extra[ln+4:]
388
389
390 class _ZipDecrypter:
391 """Class to handle decryption of files stored within a ZIP archive.
392
393 ZIP supports a password-based form of encryption. Even though known
394 plaintext attacks have been found against it, it is still useful
395 to be able to get data out of such a file.
396
397 Usage:
398 zd = _ZipDecrypter(mypwd)
399 plain_char = zd(cypher_char)
400 plain_text = map(zd, cypher_text)
401 """
402
403 def _GenerateCRCTable():
404 """Generate a CRC-32 table.
405
406 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
407 internal keys. We noticed that a direct implementation is faster than
408 relying on binascii.crc32().
409 """
410 poly = 0xedb88320
411 table = [0] * 256
412 for i in range(256):
413 crc = i
414 for j in range(8):
415 if crc & 1:
416 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
417 else:
418 crc = ((crc >> 1) & 0x7FFFFFFF)
419 table[i] = crc
420 return table
421 crctable = _GenerateCRCTable()
422
423 def _crc32(self, ch, crc):
424 """Compute the CRC32 primitive on one byte."""
425 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
426
427 def __init__(self, pwd):
428 self.key0 = 305419896
429 self.key1 = 591751049
430 self.key2 = 878082192
431 for p in pwd:
432 self._UpdateKeys(p)
433
434 def _UpdateKeys(self, c):
435 self.key0 = self._crc32(c, self.key0)
436 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
437 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
438 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
439
440 def __call__(self, c):
441 """Decrypt a single character."""
442 c = ord(c)
443 k = self.key2 | 2
444 c = c ^ (((k * (k^1)) >> 8) & 255)
445 c = chr(c)
446 self._UpdateKeys(c)
447 return c
448
449 class ZipExtFile:
450 """File-like object for reading an archive member.
451 Is returned by ZipFile.open().
452 """
453
454 def __init__(self, fileobj, zipinfo, decrypt=None):
455 self.fileobj = fileobj
456 self.decrypter = decrypt
457 self.bytes_read = 0L
458 self.rawbuffer = ''
459 self.readbuffer = ''
460 self.linebuffer = ''
461 self.eof = False
462 self.univ_newlines = False
463 self.nlSeps = ("\n", )
464 self.lastdiscard = ''
465
466 self.compress_type = zipinfo.compress_type
467 self.compress_size = zipinfo.compress_size
468
469 self.closed = False
470 self.mode = "r"
471 self.name = zipinfo.filename
472
473 # read from compressed files in 64k blocks
474 self.compreadsize = 64*1024
475 if self.compress_type == ZIP_DEFLATED:
476 self.dc = zlib.decompressobj(-15)
477 elif self.compress_type == ZIP_BZIP2:
478 self.dc = bz2.BZ2Decompressor()
479 self.compreadsize = 900000
480
481 def set_univ_newlines(self, univ_newlines):
482 self.univ_newlines = univ_newlines
483
484 # pick line separator char(s) based on universal newlines flag
485 self.nlSeps = ("\n", )
486 if self.univ_newlines:
487 self.nlSeps = ("\r\n", "\r", "\n")
488
489 def __iter__(self):
490 return self
491
492 def next(self):
493 nextline = self.readline()
494 if not nextline:
495 raise StopIteration()
496
497 return nextline
498
499 def close(self):
500 self.closed = True
501
502 def _checkfornewline(self):
503 nl, nllen = -1, -1
504 if self.linebuffer:
505 # ugly check for cases where half of an \r\n pair was
506 # read on the last pass, and the \r was discarded. In this
507 # case we just throw away the \n at the start of the buffer.
508 if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
509 self.linebuffer = self.linebuffer[1:]
510
511 for sep in self.nlSeps:
512 nl = self.linebuffer.find(sep)
513 if nl >= 0:
514 nllen = len(sep)
515 return nl, nllen
516
517 return nl, nllen
518
519 def readline(self, size = -1):
520 """Read a line with approx. size. If size is negative,
521 read a whole line.
522 """
523 if size < 0:
524 size = sys.maxint
525 elif size == 0:
526 return ''
527
528 # check for a newline already in buffer
529 nl, nllen = self._checkfornewline()
530
531 if nl >= 0:
532 # the next line was already in the buffer
533 nl = min(nl, size)
534 else:
535 # no line break in buffer - try to read more
536 size -= len(self.linebuffer)
537 while nl < 0 and size > 0:
538 buf = self.read(min(size, 100))
539 if not buf:
540 break
541 self.linebuffer += buf
542 size -= len(buf)
543
544 # check for a newline in buffer
545 nl, nllen = self._checkfornewline()
546
547 # we either ran out of bytes in the file, or
548 # met the specified size limit without finding a newline,
549 # so return current buffer
550 if nl < 0:
551 s = self.linebuffer
552 self.linebuffer = ''
553 return s
554
555 buf = self.linebuffer[:nl]
556 self.lastdiscard = self.linebuffer[nl:nl + nllen]
557 self.linebuffer = self.linebuffer[nl + nllen:]
558
559 # line is always returned with \n as newline char (except possibly
560 # for a final incomplete line in the file, which is handled above).
561 return buf + "\n"
562
563 def readlines(self, sizehint = -1):
564 """Return a list with all (following) lines. The sizehint parameter
565 is ignored in this implementation.
566 """
567 result = []
568 while True:
569 line = self.readline()
570 if not line: break
571 result.append(line)
572 return result
573
574 def read(self, size = None):
575 # act like file() obj and return empty string if size is 0
576 if size == 0:
577 return ''
578
579 # determine read size
580 bytesToRead = self.compress_size - self.bytes_read
581
582 # adjust read size for encrypted files since the first 12 bytes
583 # are for the encryption/password information
584 if self.decrypter is not None:
585 bytesToRead -= 12
586
587 if size is not None and size >= 0:
588 if self.compress_type == ZIP_STORED:
589 lr = len(self.readbuffer)
590 bytesToRead = min(bytesToRead, size - lr)
591 else:
592 if len(self.readbuffer) > size:
593 # the user has requested fewer bytes than we've already
594 # pulled through the decompressor; don't read any more
595 bytesToRead = 0
596 else:
597 # user will use up the buffer, so read some more
598 lr = len(self.rawbuffer)
599 bytesToRead = min(bytesToRead, self.compreadsize - lr)
600
601 # avoid reading past end of file contents
602 if bytesToRead + self.bytes_read > self.compress_size:
603 bytesToRead = self.compress_size - self.bytes_read
604
605 # try to read from file (if necessary)
606 if bytesToRead > 0:
607 bytes = self.fileobj.read(bytesToRead)
608 self.bytes_read += len(bytes)
609 self.rawbuffer += bytes
610
611 # handle contents of raw buffer
612 if self.rawbuffer:
613 newdata = self.rawbuffer
614 self.rawbuffer = ''
615
616 # decrypt new data if we were given an object to handle that
617 if newdata and self.decrypter is not None:
618 newdata = ''.join(map(self.decrypter, newdata))
619
620 # decompress newly read data if necessary
621 if newdata and self.compress_type != ZIP_STORED:
622 newdata = self.dc.decompress(newdata)
623 self.rawbuffer = self.dc.unconsumed_tail if self.compress_type == ZIP_DEFLATED else ''
624 if self.eof and len(self.rawbuffer) == 0:
625 # we're out of raw bytes (both from the file and
626 # the local buffer); flush just to make sure the
627 # decompressor is done
628 try:
629 newdata += self.dc.flush()
630 except AttributeError:
631 pass
632 # prevent decompressor from being used again
633 self.dc = None
634
635 self.readbuffer += newdata
636
637
638 # return what the user asked for
639 if size is None or len(self.readbuffer) <= size:
640 bytes = self.readbuffer
641 self.readbuffer = ''
642 else:
643 bytes = self.readbuffer[:size]
644 self.readbuffer = self.readbuffer[size:]
645
646 return bytes
647
648
649 class ZipFile:
650 """ Class with methods to open, read, write, close, list zip files.
651
652 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
653
654 file: Either the path to the file, or a file-like object.
655 If it is a path, the file will be opened and closed by ZipFile.
656 mode: The mode can be either read "r", write "w" or append "a".
657 compression: ZIP_STORED (no compression), ZIP_DEFLATED (requires zlib),
658 or ZIP_BZIP2 (requires bz2).
659 allowZip64: if True ZipFile will create files with ZIP64 extensions when
660 needed, otherwise it will raise an exception when this would
661 be necessary.
662
663 """
664
665 fp = None # Set here since __del__ checks it
666
667 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
668 """Open the ZIP file with mode read "r", write "w" or append "a"."""
669 if mode not in ("r", "w", "a"):
670 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
671
672 if compression == ZIP_STORED:
673 pass
674 elif compression == ZIP_DEFLATED:
675 if not zlib:
676 raise RuntimeError,\
677 "Compression requires the (missing) zlib module"
678 elif compression == ZIP_BZIP2:
679 if not bz2:
680 raise RuntimeError,\
681 "Compression requires the (missing) bz2 module"
682 else:
683 raise RuntimeError, "That compression method is not supported"
684
685 self._allowZip64 = allowZip64
686 self._didModify = False
687 self.debug = 0 # Level of printing: 0 through 3
688 self.NameToInfo = {} # Find file info given name
689 self.filelist = [] # List of ZipInfo instances for archive
690 self.compression = compression # Method of compression
691 self.mode = key = mode.replace('b', '')[0]
692 self.pwd = None
693 self.comment = ''
694
695 # Check if we were passed a file-like object
696 if isinstance(file, basestring):
697 self._filePassed = 0
698 self.filename = file
699 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
700 try:
701 self.fp = open(file, modeDict[mode])
702 except IOError:
703 if mode == 'a':
704 mode = key = 'w'
705 self.fp = open(file, modeDict[mode])
706 else:
707 raise
708 else:
709 self._filePassed = 1
710 self.fp = file
711 self.filename = getattr(file, 'name', None)
712
713 if key == 'r':
714 self._GetContents()
715 elif key == 'w':
716 pass
717 elif key == 'a':
718 try: # See if file is a zip file
719 self._RealGetContents()
720 # seek to start of directory and overwrite
721 self.fp.seek(self.start_dir, 0)
722 except BadZipfile: # file is not a zip file, just append
723 self.fp.seek(0, 2)
724 else:
725 if not self._filePassed:
726 self.fp.close()
727 self.fp = None
728 raise RuntimeError, 'Mode must be "r", "w" or "a"'
729
730 def _GetContents(self):
731 """Read the directory, making sure we close the file if the format
732 is bad."""
733 try:
734 self._RealGetContents()
735 except BadZipfile:
736 if not self._filePassed:
737 self.fp.close()
738 self.fp = None
739 raise
740
741 def _RealGetContents(self):
742 """Read in the table of contents for the ZIP file."""
743 fp = self.fp
744 endrec = _EndRecData(fp)
745 if not endrec:
746 raise BadZipfile, "File is not a zip file"
747 if self.debug > 1:
748 print endrec
749 size_cd = endrec[_ECD_SIZE] # bytes in central directory
750 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
751 self.comment = endrec[_ECD_COMMENT] # archive comment
752
753 # "concat" is zero, unless zip was concatenated to another file
754 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
755 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
756 # If Zip64 extension structures are present, account for them
757 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
758
759 if self.debug > 2:
760 inferred = concat + offset_cd
761 print "given, inferred, offset", offset_cd, inferred, concat
762 # self.start_dir: Position of start of central directory
763 self.start_dir = offset_cd + concat
764 fp.seek(self.start_dir, 0)
765 data = fp.read(size_cd)
766 fp = cStringIO.StringIO(data)
767 total = 0
768 while total < size_cd:
769 centdir = fp.read(sizeCentralDir)
770 if centdir[0:4] != stringCentralDir:
771 raise BadZipfile, "Bad magic number for central directory"
772 centdir = struct.unpack(structCentralDir, centdir)
773 if self.debug > 2:
774 print centdir
775 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
776 # Create ZipInfo instance to store file information
777 x = ZipInfo(filename)
778 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
779 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
780 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
781 (x.create_version, x.create_system, x.extract_version, x.reserved,
782 x.flag_bits, x.compress_type, t, d,
783 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
784 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
785 # Convert date/time code to (year, month, day, hour, min, sec)
786 x._raw_time = t
787 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
788 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
789
790 x._decodeExtra()
791 x.header_offset = x.header_offset + concat
792 x.filename = x._decodeFilename()
793 self.filelist.append(x)
794 self.NameToInfo[x.filename] = x
795
796 # update total bytes read from central directory
797 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
798 + centdir[_CD_EXTRA_FIELD_LENGTH]
799 + centdir[_CD_COMMENT_LENGTH])
800
801 if self.debug > 2:
802 print "total", total
803
804
805 def namelist(self):
806 """Return a list of file names in the archive."""
807 l = []
808 for data in self.filelist:
809 l.append(data.filename)
810 return l
811
812 def infolist(self):
813 """Return a list of class ZipInfo instances for files in the
814 archive."""
815 return self.filelist
816
817 def printdir(self):
818 """Print a table of contents for the zip file."""
819 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
820 for zinfo in self.filelist:
821 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
822 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
823
824 def testzip(self):
825 """Read all the files and check the CRC."""
826 chunk_size = 2 ** 20
827 for zinfo in self.filelist:
828 try:
829 # Read by chunks, to avoid an OverflowError or a
830 # MemoryError with very large embedded files.
831 f = self.open(zinfo.filename, "r")
832 while f.read(chunk_size): # Check CRC-32
833 pass
834 except BadZipfile:
835 return zinfo.filename
836
837 def getinfo(self, name):
838 """Return the instance of ZipInfo given 'name'."""
839 info = self.NameToInfo.get(name)
840 if info is None:
841 raise KeyError(
842 'There is no item named %r in the archive' % name)
843
844 return info
845
846 def setpassword(self, pwd):
847 """Set default password for encrypted files."""
848 self.pwd = pwd
849
850 def read(self, name, pwd=None):
851 """Return file bytes (as a string) for name."""
852 return self.open(name, "r", pwd).read()
853
854 def open(self, name, mode="r", pwd=None):
855 """Return file-like object for 'name'."""
856 if mode not in ("r", "U", "rU"):
857 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
858 if not self.fp:
859 raise RuntimeError, \
860 "Attempt to read ZIP archive that was already closed"
861
862 # Only open a new file for instances where we were not
863 # given a file object in the constructor
864 if self._filePassed:
865 zef_file = self.fp
866 else:
867 zef_file = open(self.filename, 'rb')
868
869 # Make sure we have an info object
870 if isinstance(name, ZipInfo):
871 # 'name' is already an info object
872 zinfo = name
873 else:
874 # Get info object for name
875 zinfo = self.getinfo(name)
876
877 zef_file.seek(zinfo.header_offset, 0)
878
879 # Skip the file header:
880 fheader = zef_file.read(sizeFileHeader)
881 if fheader[0:4] != stringFileHeader:
882 raise BadZipfile, "Bad magic number for file header"
883
884 fheader = struct.unpack(structFileHeader, fheader)
885 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
886 if fheader[_FH_EXTRA_FIELD_LENGTH]:
887 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
888
889 if fname != zinfo.orig_filename:
890 raise BadZipfile, \
891 'File name in directory "%s" and header "%s" differ.' % (
892 zinfo.orig_filename, fname)
893
894 # check for encrypted flag & handle password
895 is_encrypted = zinfo.flag_bits & 0x1
896 zd = None
897 if is_encrypted:
898 if not pwd:
899 pwd = self.pwd
900 if not pwd:
901 raise RuntimeError, "File %s is encrypted, " \
902 "password required for extraction" % name
903
904 zd = _ZipDecrypter(pwd)
905 # The first 12 bytes in the cypher stream is an encryption header
906 # used to strengthen the algorithm. The first 11 bytes are
907 # completely random, while the 12th contains the MSB of the CRC,
908 # or the MSB of the file time depending on the header type
909 # and is used to check the correctness of the password.
910 bytes = zef_file.read(12)
911 h = map(zd, bytes[0:12])
912 if zinfo.flag_bits & 0x8:
913 # compare against the file type from extended local headers
914 check_byte = (zinfo._raw_time >> 8) & 0xff
915 else:
916 # compare against the CRC otherwise
917 check_byte = (zinfo.CRC >> 24) & 0xff
918 if ord(h[11]) != check_byte:
919 raise RuntimeError("Bad password for file", name)
920
921 # build and return a ZipExtFile
922 if zd is None:
923 zef = ZipExtFile(zef_file, zinfo)
924 else:
925 zef = ZipExtFile(zef_file, zinfo, zd)
926
927 # set universal newlines on ZipExtFile if necessary
928 if "U" in mode:
929 zef.set_univ_newlines(True)
930 return zef
931
932 def extract(self, member, path=None, pwd=None):
933 """Extract a member from the archive to the current working directory,
934 using its full name. Its file information is extracted as accurately
935 as possible. `member' may be a filename or a ZipInfo object. You can
936 specify a different directory using `path'.
937 """
938 if not isinstance(member, ZipInfo):
939 member = self.getinfo(member)
940
941 if path is None:
942 path = os.getcwd()
943
944 return self._extract_member(member, path, pwd)
945
946 def extractall(self, path=None, members=None, pwd=None):
947 """Extract all members from the archive to the current working
948 directory. `path' specifies a different directory to extract to.
949 `members' is optional and must be a subset of the list returned
950 by namelist().
951 """
952 if members is None:
953 members = self.namelist()
954
955 for zipinfo in members:
956 self.extract(zipinfo, path, pwd)
957
958 def _extract_member(self, member, targetpath, pwd):
959 """Extract the ZipInfo object 'member' to a physical
960 file on the path targetpath.
961 """
962 # build the destination pathname, replacing
963 # forward slashes to platform specific separators.
964 # Strip trailing path separator, unless it represents the root.
965 if (targetpath[-1:] in (os.path.sep, os.path.altsep)
966 and len(os.path.splitdrive(targetpath)[1]) > 1):
967 targetpath = targetpath[:-1]
968
969 # don't include leading "/" from file name if present
970 if member.filename[0] == '/':
971 targetpath = os.path.join(targetpath, member.filename[1:])
972 else:
973 targetpath = os.path.join(targetpath, member.filename)
974
975 targetpath = os.path.normpath(targetpath)
976
977 # Create all upper directories if necessary.
978 upperdirs = os.path.dirname(targetpath)
979 if upperdirs and not os.path.exists(upperdirs):
980 os.makedirs(upperdirs)
981
982 if member.filename[-1] == '/':
983 if not os.path.isdir(targetpath):
984 os.mkdir(targetpath)
985 return targetpath
986
987 source = self.open(member, pwd=pwd)
988 target = file(targetpath, "wb")
989 shutil.copyfileobj(source, target)
990 source.close()
991 target.close()
992
993 return targetpath
994
995 def _writecheck(self, zinfo):
996 """Check for errors before writing a file to the archive."""
997 if zinfo.filename in self.NameToInfo:
998 if self.debug: # Warning for duplicate names
999 print "Duplicate name:", zinfo.filename
1000 if self.mode not in ("w", "a"):
1001 raise RuntimeError, 'write() requires mode "w" or "a"'
1002 if not self.fp:
1003 raise RuntimeError, \
1004 "Attempt to write ZIP archive that was already closed"
1005 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1006 raise RuntimeError, \
1007 "Compression requires the (missing) zlib module"
1008 if zinfo.compress_type == ZIP_BZIP2 and not bz2:
1009 raise RuntimeError, \
1010 "Compression requires the (missing) bz2 module"
1011 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED, ZIP_BZIP2):
1012 raise RuntimeError, \
1013 "That compression method is not supported"
1014 if zinfo.file_size > ZIP64_LIMIT:
1015 if not self._allowZip64:
1016 raise LargeZipFile("Filesize would require ZIP64 extensions")
1017 if zinfo.header_offset > ZIP64_LIMIT:
1018 if not self._allowZip64:
1019 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
1020
1021 def write(self, filename, arcname=None, compress_type=None):
1022 """Put the bytes from filename into the archive under the name
1023 arcname."""
1024 if not self.fp:
1025 raise RuntimeError(
1026 "Attempt to write to ZIP archive that was already closed")
1027
1028 st = os.stat(filename)
1029 isdir = stat.S_ISDIR(st.st_mode)
1030 mtime = time.localtime(st.st_mtime)
1031 date_time = mtime[0:6]
1032 # Create ZipInfo instance to store file information
1033 if arcname is None:
1034 arcname = filename
1035 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1036 while arcname[0] in (os.sep, os.altsep):
1037 arcname = arcname[1:]
1038 if isdir:
1039 arcname += '/'
1040 zinfo = ZipInfo(arcname, date_time)
1041 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
1042 if compress_type is None:
1043 zinfo.compress_type = self.compression
1044 else:
1045 zinfo.compress_type = compress_type
1046
1047 zinfo.file_size = st.st_size
1048 zinfo.flag_bits = 0x00
1049 zinfo.header_offset = self.fp.tell() # Start of header bytes
1050
1051 self._writecheck(zinfo)
1052 self._didModify = True
1053
1054 if isdir:
1055 zinfo.file_size = 0
1056 zinfo.compress_size = 0
1057 zinfo.CRC = 0
1058 self.filelist.append(zinfo)
1059 self.NameToInfo[zinfo.filename] = zinfo
1060 self.fp.write(zinfo.FileHeader())
1061 return
1062
1063 fp = open(filename, "rb")
1064 # Must overwrite CRC and sizes with correct data later
1065 zinfo.CRC = CRC = 0
1066 zinfo.compress_size = compress_size = 0
1067 zinfo.file_size = file_size = 0
1068 self.fp.write(zinfo.FileHeader())
1069 if zinfo.compress_type == ZIP_DEFLATED:
1070 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1071 zlib.DEFLATED, -15)
1072 elif zinfo.compress_type == ZIP_BZIP2:
1073 cmpr = bz2.BZ2Compressor()
1074 else:
1075 cmpr = None
1076 while 1:
1077 buf = fp.read(1024 * 8)
1078 if not buf:
1079 break
1080 file_size = file_size + len(buf)
1081 CRC = crc32(buf, CRC) & 0xffffffff
1082 if cmpr:
1083 buf = cmpr.compress(buf)
1084 compress_size = compress_size + len(buf)
1085 self.fp.write(buf)
1086 fp.close()
1087 if cmpr:
1088 buf = cmpr.flush()
1089 compress_size = compress_size + len(buf)
1090 self.fp.write(buf)
1091 zinfo.compress_size = compress_size
1092 else:
1093 zinfo.compress_size = file_size
1094 zinfo.CRC = CRC
1095 zinfo.file_size = file_size
1096 # Seek backwards and write CRC and file sizes
1097 position = self.fp.tell() # Preserve current position in file
1098 self.fp.seek(zinfo.header_offset + 14, 0)
1099 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1100 zinfo.file_size))
1101 self.fp.seek(position, 0)
1102 self.filelist.append(zinfo)
1103 self.NameToInfo[zinfo.filename] = zinfo
1104
1105 def writestr(self, zinfo_or_arcname, bytes):
1106 """Write a file into the archive. The contents is the string
1107 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1108 the name of the file in the archive."""
1109 if not isinstance(zinfo_or_arcname, ZipInfo):
1110 zinfo = ZipInfo(filename=zinfo_or_arcname,
1111 date_time=time.localtime(time.time())[:6])
1112 zinfo.compress_type = self.compression
1113 zinfo.external_attr = 0600 << 16
1114 else:
1115 zinfo = zinfo_or_arcname
1116
1117 if not self.fp:
1118 raise RuntimeError(
1119 "Attempt to write to ZIP archive that was already closed")
1120
1121 zinfo.file_size = len(bytes) # Uncompressed size
1122 zinfo.header_offset = self.fp.tell() # Start of header bytes
1123 self._writecheck(zinfo)
1124 self._didModify = True
1125 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
1126 if zinfo.compress_type == ZIP_DEFLATED:
1127 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1128 zlib.DEFLATED, -15)
1129 bytes = co.compress(bytes) + co.flush()
1130 zinfo.compress_size = len(bytes) # Compressed size
1131 elif zinfo.compress_type == ZIP_BZIP2:
1132 co = bz2.BZ2Compressor()
1133 bytes = co.compress(bytes) + co.flush()
1134 zinfo.compress_size = len(bytes) # Compressed size
1135 else:
1136 zinfo.compress_size = zinfo.file_size
1137 zinfo.header_offset = self.fp.tell() # Start of header bytes
1138 self.fp.write(zinfo.FileHeader())
1139 self.fp.write(bytes)
1140 self.fp.flush()
1141 if zinfo.flag_bits & 0x08:
1142 # Write CRC and file sizes after the file data
1143 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size,
1144 zinfo.file_size))
1145 self.filelist.append(zinfo)
1146 self.NameToInfo[zinfo.filename] = zinfo
1147
1148 def __del__(self):
1149 """Call the "close()" method in case the user forgot."""
1150 self.close()
1151
1152 def close(self):
1153 """Close the file, and for mode "w" and "a" write the ending
1154 records."""
1155 if self.fp is None:
1156 return
1157
1158 if self.mode in ("w", "a") and self._didModify: # write ending records
1159 count = 0
1160 pos1 = self.fp.tell()
1161 for zinfo in self.filelist: # write central directory
1162 count = count + 1
1163 dt = zinfo.date_time
1164 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1165 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1166 extra = []
1167 if zinfo.file_size > ZIP64_LIMIT \
1168 or zinfo.compress_size > ZIP64_LIMIT:
1169 extra.append(zinfo.file_size)
1170 extra.append(zinfo.compress_size)
1171 file_size = 0xffffffff
1172 compress_size = 0xffffffff
1173 else:
1174 file_size = zinfo.file_size
1175 compress_size = zinfo.compress_size
1176
1177 if zinfo.header_offset > ZIP64_LIMIT:
1178 extra.append(zinfo.header_offset)
1179 header_offset = 0xffffffffL
1180 else:
1181 header_offset = zinfo.header_offset
1182
1183 extra_data = zinfo.extra
1184 if extra:
1185 # Append a ZIP64 field to the extra's
1186 extra_data = struct.pack(
1187 '<HH' + 'Q'*len(extra),
1188 1, 8*len(extra), *extra) + extra_data
1189
1190 extract_version = max(45, zinfo.extract_version)
1191 create_version = max(45, zinfo.create_version)
1192 else:
1193 extract_version = zinfo.extract_version
1194 create_version = zinfo.create_version
1195
1196 try:
1197 filename, flag_bits = zinfo._encodeFilenameFlags()
1198 centdir = struct.pack(structCentralDir,
1199 stringCentralDir, create_version,
1200 zinfo.create_system, extract_version, zinfo.reserved,
1201 flag_bits, zinfo.compress_type, dostime, dosdate,
1202 zinfo.CRC, compress_size, file_size,
1203 len(filename), len(extra_data), len(zinfo.comment),
1204 0, zinfo.internal_attr, zinfo.external_attr,
1205 header_offset)
1206 except DeprecationWarning:
1207 print >>sys.stderr, (structCentralDir,
1208 stringCentralDir, create_version,
1209 zinfo.create_system, extract_version, zinfo.reserved,
1210 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1211 zinfo.CRC, compress_size, file_size,
1212 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1213 0, zinfo.internal_attr, zinfo.external_attr,
1214 header_offset)
1215 raise
1216 self.fp.write(centdir)
1217 self.fp.write(filename)
1218 self.fp.write(extra_data)
1219 self.fp.write(zinfo.comment)
1220
1221 pos2 = self.fp.tell()
1222 # Write end-of-zip-archive record
1223 centDirCount = count
1224 centDirSize = pos2 - pos1
1225 centDirOffset = pos1
1226 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1227 centDirOffset > ZIP64_LIMIT or
1228 centDirSize > ZIP64_LIMIT):
1229 # Need to write the ZIP64 end-of-archive records
1230 zip64endrec = struct.pack(
1231 structEndArchive64, stringEndArchive64,
1232 44, 45, 45, 0, 0, centDirCount, centDirCount,
1233 centDirSize, centDirOffset)
1234 self.fp.write(zip64endrec)
1235
1236 zip64locrec = struct.pack(
1237 structEndArchive64Locator,
1238 stringEndArchive64Locator, 0, pos2, 1)
1239 self.fp.write(zip64locrec)
1240 centDirCount = min(centDirCount, 0xFFFF)
1241 centDirSize = min(centDirSize, 0xFFFFFFFF)
1242 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1243
1244 # check for valid comment length
1245 if len(self.comment) >= ZIP_MAX_COMMENT:
1246 if self.debug > 0:
1247 msg = 'Archive comment is too long; truncating to %d bytes' \
1248 % ZIP_MAX_COMMENT
1249 self.comment = self.comment[:ZIP_MAX_COMMENT]
1250
1251 endrec = struct.pack(structEndArchive, stringEndArchive,
1252 0, 0, centDirCount, centDirCount,
1253 centDirSize, centDirOffset, len(self.comment))
1254 self.fp.write(endrec)
1255 self.fp.write(self.comment)
1256 self.fp.flush()
1257
1258 if not self._filePassed:
1259 self.fp.close()
1260 self.fp = None
1261
1262
1263 class PyZipFile(ZipFile):
1264 """Class to create ZIP archives with Python library files and packages."""
1265
1266 def writepy(self, pathname, basename = ""):
1267 """Add all files from "pathname" to the ZIP archive.
1268
1269 If pathname is a package directory, search the directory and
1270 all package subdirectories recursively for all *.py and enter
1271 the modules into the archive. If pathname is a plain
1272 directory, listdir *.py and enter all modules. Else, pathname
1273 must be a Python *.py file and the module will be put into the
1274 archive. Added modules are always module.pyo or module.pyc.
1275 This method will compile the module.py into module.pyc if
1276 necessary.
1277 """
1278 dir, name = os.path.split(pathname)
1279 if os.path.isdir(pathname):
1280 initname = os.path.join(pathname, "__init__.py")
1281 if os.path.isfile(initname):
1282 # This is a package directory, add it
1283 if basename:
1284 basename = "%s/%s" % (basename, name)
1285 else:
1286 basename = name
1287 if self.debug:
1288 print "Adding package in", pathname, "as", basename
1289 fname, arcname = self._get_codename(initname[0:-3], basename)
1290 if self.debug:
1291 print "Adding", arcname
1292 self.write(fname, arcname)
1293 dirlist = os.listdir(pathname)
1294 dirlist.remove("__init__.py")
1295 # Add all *.py files and package subdirectories
1296 for filename in dirlist:
1297 path = os.path.join(pathname, filename)
1298 root, ext = os.path.splitext(filename)
1299 if os.path.isdir(path):
1300 if os.path.isfile(os.path.join(path, "__init__.py")):
1301 # This is a package directory, add it
1302 self.writepy(path, basename) # Recursive call
1303 elif ext == ".py":
1304 fname, arcname = self._get_codename(path[0:-3],
1305 basename)
1306 if self.debug:
1307 print "Adding", arcname
1308 self.write(fname, arcname)
1309 else:
1310 # This is NOT a package directory, add its files at top level
1311 if self.debug:
1312 print "Adding files from directory", pathname
1313 for filename in os.listdir(pathname):
1314 path = os.path.join(pathname, filename)
1315 root, ext = os.path.splitext(filename)
1316 if ext == ".py":
1317 fname, arcname = self._get_codename(path[0:-3],
1318 basename)
1319 if self.debug:
1320 print "Adding", arcname
1321 self.write(fname, arcname)
1322 else:
1323 if pathname[-3:] != ".py":
1324 raise RuntimeError, \
1325 'Files added with writepy() must end with ".py"'
1326 fname, arcname = self._get_codename(pathname[0:-3], basename)
1327 if self.debug:
1328 print "Adding file", arcname
1329 self.write(fname, arcname)
1330
1331 def _get_codename(self, pathname, basename):
1332 """Return (filename, archivename) for the path.
1333
1334 Given a module name path, return the correct file path and
1335 archive name, compiling if necessary. For example, given
1336 /python/lib/string, return (/python/lib/string.pyc, string).
1337 """
1338 file_py = pathname + ".py"
1339 file_pyc = pathname + ".pyc"
1340 file_pyo = pathname + ".pyo"
1341 if os.path.isfile(file_pyo) and \
1342 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1343 fname = file_pyo # Use .pyo file
1344 elif not os.path.isfile(file_pyc) or \
1345 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1346 import py_compile
1347 if self.debug:
1348 print "Compiling", file_py
1349 try:
1350 py_compile.compile(file_py, file_pyc, None, True)
1351 except py_compile.PyCompileError,err:
1352 print err.msg
1353 fname = file_pyc
1354 else:
1355 fname = file_pyc
1356 archivename = os.path.split(fname)[1]
1357 if basename:
1358 archivename = "%s/%s" % (basename, archivename)
1359 return (fname, archivename)
1360
1361
1362 def main(args = None):
1363 import textwrap
1364 USAGE=textwrap.dedent("""\
1365 Usage:
1366 zipfile.py -l zipfile.zip # Show listing of a zipfile
1367 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1368 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1369 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1370 """)
1371 if args is None:
1372 args = sys.argv[1:]
1373
1374 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1375 print USAGE
1376 sys.exit(1)
1377
1378 if args[0] == '-l':
1379 if len(args) != 2:
1380 print USAGE
1381 sys.exit(1)
1382 zf = ZipFile(args[1], 'r')
1383 zf.printdir()
1384 zf.close()
1385
1386 elif args[0] == '-t':
1387 if len(args) != 2:
1388 print USAGE
1389 sys.exit(1)
1390 zf = ZipFile(args[1], 'r')
1391 zf.testzip()
1392 print "Done testing"
1393
1394 elif args[0] == '-e':
1395 if len(args) != 3:
1396 print USAGE
1397 sys.exit(1)
1398
1399 zf = ZipFile(args[1], 'r')
1400 out = args[2]
1401 for path in zf.namelist():
1402 if path.startswith('./'):
1403 tgt = os.path.join(out, path[2:])
1404 else:
1405 tgt = os.path.join(out, path)
1406
1407 tgtdir = os.path.dirname(tgt)
1408 if not os.path.exists(tgtdir):
1409 os.makedirs(tgtdir)
1410 fp = open(tgt, 'wb')
1411 fp.write(zf.read(path))
1412 fp.close()
1413 zf.close()
1414
1415 elif args[0] == '-c':
1416 if len(args) < 3:
1417 print USAGE
1418 sys.exit(1)
1419
1420 def addToZip(zf, path, zippath):
1421 if os.path.isfile(path):
1422 zf.write(path, zippath, ZIP_DEFLATED)
1423 elif os.path.isdir(path):
1424 for nm in os.listdir(path):
1425 addToZip(zf,
1426 os.path.join(path, nm), os.path.join(zippath, nm))
1427 # else: ignore
1428
1429 zf = ZipFile(args[1], 'w', allowZip64=True)
1430 for src in args[2:]:
1431 addToZip(zf, src, os.path.basename(src))
1432
1433 zf.close()
1434
1435 if __name__ == "__main__":
1436 main()