|
1 #!/usr/bin/env python |
|
2 # -*- coding: iso-8859-1 -*- |
|
3 #------------------------------------------------------------------- |
|
4 # tarfile.py |
|
5 #------------------------------------------------------------------- |
|
6 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de> |
|
7 # All rights reserved. |
|
8 # |
|
9 # Permission is hereby granted, free of charge, to any person |
|
10 # obtaining a copy of this software and associated documentation |
|
11 # files (the "Software"), to deal in the Software without |
|
12 # restriction, including without limitation the rights to use, |
|
13 # copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
14 # copies of the Software, and to permit persons to whom the |
|
15 # Software is furnished to do so, subject to the following |
|
16 # conditions: |
|
17 # |
|
18 # The above copyright notice and this permission notice shall be |
|
19 # included in all copies or substantial portions of the Software. |
|
20 # |
|
21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
|
22 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
|
23 # OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
|
24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
|
25 # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
|
26 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
|
27 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
|
28 # OTHER DEALINGS IN THE SOFTWARE. |
|
29 # |
|
30 """Read from and write to tar format archives. |
|
31 """ |
|
32 |
|
33 __version__ = "$Revision: 60730 $" |
|
34 # $Source$ |
|
35 |
|
36 version = "0.8.0" |
|
37 __author__ = "Lars Gustäbel (lars@gustaebel.de)" |
|
38 __date__ = "$Date: 2008-02-11 19:36:07 +0100 (Mo, 11 Feb 2008) $" |
|
39 __cvsid__ = "$Id: tarfile.py 60730 2008-02-11 18:36:07Z lars.gustaebel $" |
|
40 __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend." |
|
41 |
|
42 #--------- |
|
43 # Imports |
|
44 #--------- |
|
45 import sys |
|
46 import os |
|
47 import shutil |
|
48 import stat |
|
49 import errno |
|
50 import time |
|
51 import struct |
|
52 import copy |
|
53 |
|
54 if sys.platform == 'mac': |
|
55 # This module needs work for MacOS9, especially in the area of pathname |
|
56 # handling. In many places it is assumed a simple substitution of / by the |
|
57 # local os.path.sep is good enough to convert pathnames, but this does not |
|
58 # work with the mac rooted:path:name versus :nonrooted:path:name syntax |
|
59 raise ImportError, "tarfile does not work for platform==mac" |
|
60 |
|
61 try: |
|
62 import grp, pwd |
|
63 except ImportError: |
|
64 grp = pwd = None |
|
65 |
|
66 # from tarfile import * |
|
67 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"] |
|
68 |
|
69 #--------------------------------------------------------- |
|
70 # tar constants |
|
71 #--------------------------------------------------------- |
|
72 NUL = "\0" # the null character |
|
73 BLOCKSIZE = 512 # length of processing blocks |
|
74 RECORDSIZE = BLOCKSIZE * 20 # length of records |
|
75 MAGIC = "ustar" # magic tar string |
|
76 VERSION = "00" # version number |
|
77 |
|
78 LENGTH_NAME = 100 # maximum length of a filename |
|
79 LENGTH_LINK = 100 # maximum length of a linkname |
|
80 LENGTH_PREFIX = 155 # maximum length of the prefix field |
|
81 MAXSIZE_MEMBER = 077777777777L # maximum size of a file (11 octal digits) |
|
82 |
|
83 REGTYPE = "0" # regular file |
|
84 AREGTYPE = "\0" # regular file |
|
85 LNKTYPE = "1" # link (inside tarfile) |
|
86 SYMTYPE = "2" # symbolic link |
|
87 CHRTYPE = "3" # character special device |
|
88 BLKTYPE = "4" # block special device |
|
89 DIRTYPE = "5" # directory |
|
90 FIFOTYPE = "6" # fifo special device |
|
91 CONTTYPE = "7" # contiguous file |
|
92 |
|
93 GNUTYPE_LONGNAME = "L" # GNU tar extension for longnames |
|
94 GNUTYPE_LONGLINK = "K" # GNU tar extension for longlink |
|
95 GNUTYPE_SPARSE = "S" # GNU tar extension for sparse file |
|
96 |
|
97 #--------------------------------------------------------- |
|
98 # tarfile constants |
|
99 #--------------------------------------------------------- |
|
100 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, # file types that tarfile |
|
101 SYMTYPE, DIRTYPE, FIFOTYPE, # can cope with. |
|
102 CONTTYPE, CHRTYPE, BLKTYPE, |
|
103 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, |
|
104 GNUTYPE_SPARSE) |
|
105 |
|
106 REGULAR_TYPES = (REGTYPE, AREGTYPE, # file types that somehow |
|
107 CONTTYPE, GNUTYPE_SPARSE) # represent regular files |
|
108 |
|
109 #--------------------------------------------------------- |
|
110 # Bits used in the mode field, values in octal. |
|
111 #--------------------------------------------------------- |
|
112 S_IFLNK = 0120000 # symbolic link |
|
113 S_IFREG = 0100000 # regular file |
|
114 S_IFBLK = 0060000 # block device |
|
115 S_IFDIR = 0040000 # directory |
|
116 S_IFCHR = 0020000 # character device |
|
117 S_IFIFO = 0010000 # fifo |
|
118 |
|
119 TSUID = 04000 # set UID on execution |
|
120 TSGID = 02000 # set GID on execution |
|
121 TSVTX = 01000 # reserved |
|
122 |
|
123 TUREAD = 0400 # read by owner |
|
124 TUWRITE = 0200 # write by owner |
|
125 TUEXEC = 0100 # execute/search by owner |
|
126 TGREAD = 0040 # read by group |
|
127 TGWRITE = 0020 # write by group |
|
128 TGEXEC = 0010 # execute/search by group |
|
129 TOREAD = 0004 # read by other |
|
130 TOWRITE = 0002 # write by other |
|
131 TOEXEC = 0001 # execute/search by other |
|
132 |
|
133 #--------------------------------------------------------- |
|
134 # Some useful functions |
|
135 #--------------------------------------------------------- |
|
136 |
|
137 def stn(s, length): |
|
138 """Convert a python string to a null-terminated string buffer. |
|
139 """ |
|
140 return s[:length] + (length - len(s)) * NUL |
|
141 |
|
142 def nts(s): |
|
143 """Convert a null-terminated string field to a python string. |
|
144 """ |
|
145 # Use the string up to the first null char. |
|
146 p = s.find("\0") |
|
147 if p == -1: |
|
148 return s |
|
149 return s[:p] |
|
150 |
|
151 def nti(s): |
|
152 """Convert a number field to a python number. |
|
153 """ |
|
154 # There are two possible encodings for a number field, see |
|
155 # itn() below. |
|
156 if s[0] != chr(0200): |
|
157 n = int(nts(s) or "0", 8) |
|
158 else: |
|
159 n = 0L |
|
160 for i in xrange(len(s) - 1): |
|
161 n <<= 8 |
|
162 n += ord(s[i + 1]) |
|
163 return n |
|
164 |
|
165 def itn(n, digits=8, posix=False): |
|
166 """Convert a python number to a number field. |
|
167 """ |
|
168 # POSIX 1003.1-1988 requires numbers to be encoded as a string of |
|
169 # octal digits followed by a null-byte, this allows values up to |
|
170 # (8**(digits-1))-1. GNU tar allows storing numbers greater than |
|
171 # that if necessary. A leading 0200 byte indicates this particular |
|
172 # encoding, the following digits-1 bytes are a big-endian |
|
173 # representation. This allows values up to (256**(digits-1))-1. |
|
174 if 0 <= n < 8 ** (digits - 1): |
|
175 s = "%0*o" % (digits - 1, n) + NUL |
|
176 else: |
|
177 if posix: |
|
178 raise ValueError("overflow in number field") |
|
179 |
|
180 if n < 0: |
|
181 # XXX We mimic GNU tar's behaviour with negative numbers, |
|
182 # this could raise OverflowError. |
|
183 n = struct.unpack("L", struct.pack("l", n))[0] |
|
184 |
|
185 s = "" |
|
186 for i in xrange(digits - 1): |
|
187 s = chr(n & 0377) + s |
|
188 n >>= 8 |
|
189 s = chr(0200) + s |
|
190 return s |
|
191 |
|
192 def calc_chksums(buf): |
|
193 """Calculate the checksum for a member's header by summing up all |
|
194 characters except for the chksum field which is treated as if |
|
195 it was filled with spaces. According to the GNU tar sources, |
|
196 some tars (Sun and NeXT) calculate chksum with signed char, |
|
197 which will be different if there are chars in the buffer with |
|
198 the high bit set. So we calculate two checksums, unsigned and |
|
199 signed. |
|
200 """ |
|
201 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512])) |
|
202 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512])) |
|
203 return unsigned_chksum, signed_chksum |
|
204 |
|
205 def copyfileobj(src, dst, length=None): |
|
206 """Copy length bytes from fileobj src to fileobj dst. |
|
207 If length is None, copy the entire content. |
|
208 """ |
|
209 if length == 0: |
|
210 return |
|
211 if length is None: |
|
212 shutil.copyfileobj(src, dst) |
|
213 return |
|
214 |
|
215 BUFSIZE = 16 * 1024 |
|
216 blocks, remainder = divmod(length, BUFSIZE) |
|
217 for b in xrange(blocks): |
|
218 buf = src.read(BUFSIZE) |
|
219 if len(buf) < BUFSIZE: |
|
220 raise IOError("end of file reached") |
|
221 dst.write(buf) |
|
222 |
|
223 if remainder != 0: |
|
224 buf = src.read(remainder) |
|
225 if len(buf) < remainder: |
|
226 raise IOError("end of file reached") |
|
227 dst.write(buf) |
|
228 return |
|
229 |
|
230 filemode_table = ( |
|
231 ((S_IFLNK, "l"), |
|
232 (S_IFREG, "-"), |
|
233 (S_IFBLK, "b"), |
|
234 (S_IFDIR, "d"), |
|
235 (S_IFCHR, "c"), |
|
236 (S_IFIFO, "p")), |
|
237 |
|
238 ((TUREAD, "r"),), |
|
239 ((TUWRITE, "w"),), |
|
240 ((TUEXEC|TSUID, "s"), |
|
241 (TSUID, "S"), |
|
242 (TUEXEC, "x")), |
|
243 |
|
244 ((TGREAD, "r"),), |
|
245 ((TGWRITE, "w"),), |
|
246 ((TGEXEC|TSGID, "s"), |
|
247 (TSGID, "S"), |
|
248 (TGEXEC, "x")), |
|
249 |
|
250 ((TOREAD, "r"),), |
|
251 ((TOWRITE, "w"),), |
|
252 ((TOEXEC|TSVTX, "t"), |
|
253 (TSVTX, "T"), |
|
254 (TOEXEC, "x")) |
|
255 ) |
|
256 |
|
257 def filemode(mode): |
|
258 """Convert a file's mode to a string of the form |
|
259 -rwxrwxrwx. |
|
260 Used by TarFile.list() |
|
261 """ |
|
262 perm = [] |
|
263 for table in filemode_table: |
|
264 for bit, char in table: |
|
265 if mode & bit == bit: |
|
266 perm.append(char) |
|
267 break |
|
268 else: |
|
269 perm.append("-") |
|
270 return "".join(perm) |
|
271 |
|
272 if os.sep != "/": |
|
273 normpath = lambda path: os.path.normpath(path).replace(os.sep, "/") |
|
274 else: |
|
275 normpath = os.path.normpath |
|
276 |
|
277 class TarError(Exception): |
|
278 """Base exception.""" |
|
279 pass |
|
280 class ExtractError(TarError): |
|
281 """General exception for extract errors.""" |
|
282 pass |
|
283 class ReadError(TarError): |
|
284 """Exception for unreadble tar archives.""" |
|
285 pass |
|
286 class CompressionError(TarError): |
|
287 """Exception for unavailable compression methods.""" |
|
288 pass |
|
289 class StreamError(TarError): |
|
290 """Exception for unsupported operations on stream-like TarFiles.""" |
|
291 pass |
|
292 |
|
293 #--------------------------- |
|
294 # internal stream interface |
|
295 #--------------------------- |
|
296 class _LowLevelFile: |
|
297 """Low-level file object. Supports reading and writing. |
|
298 It is used instead of a regular file object for streaming |
|
299 access. |
|
300 """ |
|
301 |
|
302 def __init__(self, name, mode): |
|
303 mode = { |
|
304 "r": os.O_RDONLY, |
|
305 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC, |
|
306 }[mode] |
|
307 if hasattr(os, "O_BINARY"): |
|
308 mode |= os.O_BINARY |
|
309 self.fd = os.open(name, mode) |
|
310 |
|
311 def close(self): |
|
312 os.close(self.fd) |
|
313 |
|
314 def read(self, size): |
|
315 return os.read(self.fd, size) |
|
316 |
|
317 def write(self, s): |
|
318 os.write(self.fd, s) |
|
319 |
|
320 class _Stream: |
|
321 """Class that serves as an adapter between TarFile and |
|
322 a stream-like object. The stream-like object only |
|
323 needs to have a read() or write() method and is accessed |
|
324 blockwise. Use of gzip or bzip2 compression is possible. |
|
325 A stream-like object could be for example: sys.stdin, |
|
326 sys.stdout, a socket, a tape device etc. |
|
327 |
|
328 _Stream is intended to be used only internally. |
|
329 """ |
|
330 |
|
331 def __init__(self, name, mode, comptype, fileobj, bufsize): |
|
332 """Construct a _Stream object. |
|
333 """ |
|
334 self._extfileobj = True |
|
335 if fileobj is None: |
|
336 fileobj = _LowLevelFile(name, mode) |
|
337 self._extfileobj = False |
|
338 |
|
339 if comptype == '*': |
|
340 # Enable transparent compression detection for the |
|
341 # stream interface |
|
342 fileobj = _StreamProxy(fileobj) |
|
343 comptype = fileobj.getcomptype() |
|
344 |
|
345 self.name = name or "" |
|
346 self.mode = mode |
|
347 self.comptype = comptype |
|
348 self.fileobj = fileobj |
|
349 self.bufsize = bufsize |
|
350 self.buf = "" |
|
351 self.pos = 0L |
|
352 self.closed = False |
|
353 |
|
354 if comptype == "gz": |
|
355 try: |
|
356 import zlib |
|
357 except ImportError: |
|
358 raise CompressionError("zlib module is not available") |
|
359 self.zlib = zlib |
|
360 self.crc = zlib.crc32("") |
|
361 if mode == "r": |
|
362 self._init_read_gz() |
|
363 else: |
|
364 self._init_write_gz() |
|
365 |
|
366 if comptype == "bz2": |
|
367 try: |
|
368 import bz2 |
|
369 except ImportError: |
|
370 raise CompressionError("bz2 module is not available") |
|
371 if mode == "r": |
|
372 self.dbuf = "" |
|
373 self.cmp = bz2.BZ2Decompressor() |
|
374 else: |
|
375 self.cmp = bz2.BZ2Compressor() |
|
376 |
|
377 def __del__(self): |
|
378 if hasattr(self, "closed") and not self.closed: |
|
379 self.close() |
|
380 |
|
381 def _init_write_gz(self): |
|
382 """Initialize for writing with gzip compression. |
|
383 """ |
|
384 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED, |
|
385 -self.zlib.MAX_WBITS, |
|
386 self.zlib.DEF_MEM_LEVEL, |
|
387 0) |
|
388 timestamp = struct.pack("<L", long(time.time())) |
|
389 self.__write("\037\213\010\010%s\002\377" % timestamp) |
|
390 if self.name.endswith(".gz"): |
|
391 self.name = self.name[:-3] |
|
392 self.__write(self.name + NUL) |
|
393 |
|
394 def write(self, s): |
|
395 """Write string s to the stream. |
|
396 """ |
|
397 if self.comptype == "gz": |
|
398 self.crc = self.zlib.crc32(s, self.crc) |
|
399 self.pos += len(s) |
|
400 if self.comptype != "tar": |
|
401 s = self.cmp.compress(s) |
|
402 self.__write(s) |
|
403 |
|
404 def __write(self, s): |
|
405 """Write string s to the stream if a whole new block |
|
406 is ready to be written. |
|
407 """ |
|
408 self.buf += s |
|
409 while len(self.buf) > self.bufsize: |
|
410 self.fileobj.write(self.buf[:self.bufsize]) |
|
411 self.buf = self.buf[self.bufsize:] |
|
412 |
|
413 def close(self): |
|
414 """Close the _Stream object. No operation should be |
|
415 done on it afterwards. |
|
416 """ |
|
417 if self.closed: |
|
418 return |
|
419 |
|
420 if self.mode == "w" and self.comptype != "tar": |
|
421 self.buf += self.cmp.flush() |
|
422 |
|
423 if self.mode == "w" and self.buf: |
|
424 self.fileobj.write(self.buf) |
|
425 self.buf = "" |
|
426 if self.comptype == "gz": |
|
427 # The native zlib crc is an unsigned 32-bit integer, but |
|
428 # the Python wrapper implicitly casts that to a signed C |
|
429 # long. So, on a 32-bit box self.crc may "look negative", |
|
430 # while the same crc on a 64-bit box may "look positive". |
|
431 # To avoid irksome warnings from the `struct` module, force |
|
432 # it to look positive on all boxes. |
|
433 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL)) |
|
434 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL)) |
|
435 |
|
436 if not self._extfileobj: |
|
437 self.fileobj.close() |
|
438 |
|
439 self.closed = True |
|
440 |
|
441 def _init_read_gz(self): |
|
442 """Initialize for reading a gzip compressed fileobj. |
|
443 """ |
|
444 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS) |
|
445 self.dbuf = "" |
|
446 |
|
447 # taken from gzip.GzipFile with some alterations |
|
448 if self.__read(2) != "\037\213": |
|
449 raise ReadError("not a gzip file") |
|
450 if self.__read(1) != "\010": |
|
451 raise CompressionError("unsupported compression method") |
|
452 |
|
453 flag = ord(self.__read(1)) |
|
454 self.__read(6) |
|
455 |
|
456 if flag & 4: |
|
457 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1)) |
|
458 self.read(xlen) |
|
459 if flag & 8: |
|
460 while True: |
|
461 s = self.__read(1) |
|
462 if not s or s == NUL: |
|
463 break |
|
464 if flag & 16: |
|
465 while True: |
|
466 s = self.__read(1) |
|
467 if not s or s == NUL: |
|
468 break |
|
469 if flag & 2: |
|
470 self.__read(2) |
|
471 |
|
472 def tell(self): |
|
473 """Return the stream's file pointer position. |
|
474 """ |
|
475 return self.pos |
|
476 |
|
477 def seek(self, pos=0): |
|
478 """Set the stream's file pointer to pos. Negative seeking |
|
479 is forbidden. |
|
480 """ |
|
481 if pos - self.pos >= 0: |
|
482 blocks, remainder = divmod(pos - self.pos, self.bufsize) |
|
483 for i in xrange(blocks): |
|
484 self.read(self.bufsize) |
|
485 self.read(remainder) |
|
486 else: |
|
487 raise StreamError("seeking backwards is not allowed") |
|
488 return self.pos |
|
489 |
|
490 def read(self, size=None): |
|
491 """Return the next size number of bytes from the stream. |
|
492 If size is not defined, return all bytes of the stream |
|
493 up to EOF. |
|
494 """ |
|
495 if size is None: |
|
496 t = [] |
|
497 while True: |
|
498 buf = self._read(self.bufsize) |
|
499 if not buf: |
|
500 break |
|
501 t.append(buf) |
|
502 buf = "".join(t) |
|
503 else: |
|
504 buf = self._read(size) |
|
505 self.pos += len(buf) |
|
506 return buf |
|
507 |
|
508 def _read(self, size): |
|
509 """Return size bytes from the stream. |
|
510 """ |
|
511 if self.comptype == "tar": |
|
512 return self.__read(size) |
|
513 |
|
514 c = len(self.dbuf) |
|
515 t = [self.dbuf] |
|
516 while c < size: |
|
517 buf = self.__read(self.bufsize) |
|
518 if not buf: |
|
519 break |
|
520 buf = self.cmp.decompress(buf) |
|
521 t.append(buf) |
|
522 c += len(buf) |
|
523 t = "".join(t) |
|
524 self.dbuf = t[size:] |
|
525 return t[:size] |
|
526 |
|
527 def __read(self, size): |
|
528 """Return size bytes from stream. If internal buffer is empty, |
|
529 read another block from the stream. |
|
530 """ |
|
531 c = len(self.buf) |
|
532 t = [self.buf] |
|
533 while c < size: |
|
534 buf = self.fileobj.read(self.bufsize) |
|
535 if not buf: |
|
536 break |
|
537 t.append(buf) |
|
538 c += len(buf) |
|
539 t = "".join(t) |
|
540 self.buf = t[size:] |
|
541 return t[:size] |
|
542 # class _Stream |
|
543 |
|
544 class _StreamProxy(object): |
|
545 """Small proxy class that enables transparent compression |
|
546 detection for the Stream interface (mode 'r|*'). |
|
547 """ |
|
548 |
|
549 def __init__(self, fileobj): |
|
550 self.fileobj = fileobj |
|
551 self.buf = self.fileobj.read(BLOCKSIZE) |
|
552 |
|
553 def read(self, size): |
|
554 self.read = self.fileobj.read |
|
555 return self.buf |
|
556 |
|
557 def getcomptype(self): |
|
558 if self.buf.startswith("\037\213\010"): |
|
559 return "gz" |
|
560 if self.buf.startswith("BZh91"): |
|
561 return "bz2" |
|
562 return "tar" |
|
563 |
|
564 def close(self): |
|
565 self.fileobj.close() |
|
566 # class StreamProxy |
|
567 |
|
568 class _BZ2Proxy(object): |
|
569 """Small proxy class that enables external file object |
|
570 support for "r:bz2" and "w:bz2" modes. This is actually |
|
571 a workaround for a limitation in bz2 module's BZ2File |
|
572 class which (unlike gzip.GzipFile) has no support for |
|
573 a file object argument. |
|
574 """ |
|
575 |
|
576 blocksize = 16 * 1024 |
|
577 |
|
578 def __init__(self, fileobj, mode): |
|
579 self.fileobj = fileobj |
|
580 self.mode = mode |
|
581 self.init() |
|
582 |
|
583 def init(self): |
|
584 import bz2 |
|
585 self.pos = 0 |
|
586 if self.mode == "r": |
|
587 self.bz2obj = bz2.BZ2Decompressor() |
|
588 self.fileobj.seek(0) |
|
589 self.buf = "" |
|
590 else: |
|
591 self.bz2obj = bz2.BZ2Compressor() |
|
592 |
|
593 def read(self, size): |
|
594 b = [self.buf] |
|
595 x = len(self.buf) |
|
596 while x < size: |
|
597 try: |
|
598 raw = self.fileobj.read(self.blocksize) |
|
599 data = self.bz2obj.decompress(raw) |
|
600 b.append(data) |
|
601 except EOFError: |
|
602 break |
|
603 x += len(data) |
|
604 self.buf = "".join(b) |
|
605 |
|
606 buf = self.buf[:size] |
|
607 self.buf = self.buf[size:] |
|
608 self.pos += len(buf) |
|
609 return buf |
|
610 |
|
611 def seek(self, pos): |
|
612 if pos < self.pos: |
|
613 self.init() |
|
614 self.read(pos - self.pos) |
|
615 |
|
616 def tell(self): |
|
617 return self.pos |
|
618 |
|
619 def write(self, data): |
|
620 self.pos += len(data) |
|
621 raw = self.bz2obj.compress(data) |
|
622 self.fileobj.write(raw) |
|
623 |
|
624 def close(self): |
|
625 if self.mode == "w": |
|
626 raw = self.bz2obj.flush() |
|
627 self.fileobj.write(raw) |
|
628 self.fileobj.close() |
|
629 # class _BZ2Proxy |
|
630 |
|
631 #------------------------ |
|
632 # Extraction file object |
|
633 #------------------------ |
|
634 class _FileInFile(object): |
|
635 """A thin wrapper around an existing file object that |
|
636 provides a part of its data as an individual file |
|
637 object. |
|
638 """ |
|
639 |
|
640 def __init__(self, fileobj, offset, size, sparse=None): |
|
641 self.fileobj = fileobj |
|
642 self.offset = offset |
|
643 self.size = size |
|
644 self.sparse = sparse |
|
645 self.position = 0 |
|
646 |
|
647 def tell(self): |
|
648 """Return the current file position. |
|
649 """ |
|
650 return self.position |
|
651 |
|
652 def seek(self, position): |
|
653 """Seek to a position in the file. |
|
654 """ |
|
655 self.position = position |
|
656 |
|
657 def read(self, size=None): |
|
658 """Read data from the file. |
|
659 """ |
|
660 if size is None: |
|
661 size = self.size - self.position |
|
662 else: |
|
663 size = min(size, self.size - self.position) |
|
664 |
|
665 if self.sparse is None: |
|
666 return self.readnormal(size) |
|
667 else: |
|
668 return self.readsparse(size) |
|
669 |
|
670 def readnormal(self, size): |
|
671 """Read operation for regular files. |
|
672 """ |
|
673 self.fileobj.seek(self.offset + self.position) |
|
674 self.position += size |
|
675 return self.fileobj.read(size) |
|
676 |
|
677 def readsparse(self, size): |
|
678 """Read operation for sparse files. |
|
679 """ |
|
680 data = [] |
|
681 while size > 0: |
|
682 buf = self.readsparsesection(size) |
|
683 if not buf: |
|
684 break |
|
685 size -= len(buf) |
|
686 data.append(buf) |
|
687 return "".join(data) |
|
688 |
|
689 def readsparsesection(self, size): |
|
690 """Read a single section of a sparse file. |
|
691 """ |
|
692 section = self.sparse.find(self.position) |
|
693 |
|
694 if section is None: |
|
695 return "" |
|
696 |
|
697 size = min(size, section.offset + section.size - self.position) |
|
698 |
|
699 if isinstance(section, _data): |
|
700 realpos = section.realpos + self.position - section.offset |
|
701 self.fileobj.seek(self.offset + realpos) |
|
702 self.position += size |
|
703 return self.fileobj.read(size) |
|
704 else: |
|
705 self.position += size |
|
706 return NUL * size |
|
707 #class _FileInFile |
|
708 |
|
709 |
|
710 class ExFileObject(object): |
|
711 """File-like object for reading an archive member. |
|
712 Is returned by TarFile.extractfile(). |
|
713 """ |
|
714 blocksize = 1024 |
|
715 |
|
716 def __init__(self, tarfile, tarinfo): |
|
717 self.fileobj = _FileInFile(tarfile.fileobj, |
|
718 tarinfo.offset_data, |
|
719 tarinfo.size, |
|
720 getattr(tarinfo, "sparse", None)) |
|
721 self.name = tarinfo.name |
|
722 self.mode = "r" |
|
723 self.closed = False |
|
724 self.size = tarinfo.size |
|
725 |
|
726 self.position = 0 |
|
727 self.buffer = "" |
|
728 |
|
729 def read(self, size=None): |
|
730 """Read at most size bytes from the file. If size is not |
|
731 present or None, read all data until EOF is reached. |
|
732 """ |
|
733 if self.closed: |
|
734 raise ValueError("I/O operation on closed file") |
|
735 |
|
736 buf = "" |
|
737 if self.buffer: |
|
738 if size is None: |
|
739 buf = self.buffer |
|
740 self.buffer = "" |
|
741 else: |
|
742 buf = self.buffer[:size] |
|
743 self.buffer = self.buffer[size:] |
|
744 |
|
745 if size is None: |
|
746 buf += self.fileobj.read() |
|
747 else: |
|
748 buf += self.fileobj.read(size - len(buf)) |
|
749 |
|
750 self.position += len(buf) |
|
751 return buf |
|
752 |
|
753 def readline(self, size=-1): |
|
754 """Read one entire line from the file. If size is present |
|
755 and non-negative, return a string with at most that |
|
756 size, which may be an incomplete line. |
|
757 """ |
|
758 if self.closed: |
|
759 raise ValueError("I/O operation on closed file") |
|
760 |
|
761 if "\n" in self.buffer: |
|
762 pos = self.buffer.find("\n") + 1 |
|
763 else: |
|
764 buffers = [self.buffer] |
|
765 while True: |
|
766 buf = self.fileobj.read(self.blocksize) |
|
767 buffers.append(buf) |
|
768 if not buf or "\n" in buf: |
|
769 self.buffer = "".join(buffers) |
|
770 pos = self.buffer.find("\n") + 1 |
|
771 if pos == 0: |
|
772 # no newline found. |
|
773 pos = len(self.buffer) |
|
774 break |
|
775 |
|
776 if size != -1: |
|
777 pos = min(size, pos) |
|
778 |
|
779 buf = self.buffer[:pos] |
|
780 self.buffer = self.buffer[pos:] |
|
781 self.position += len(buf) |
|
782 return buf |
|
783 |
|
784 def readlines(self): |
|
785 """Return a list with all remaining lines. |
|
786 """ |
|
787 result = [] |
|
788 while True: |
|
789 line = self.readline() |
|
790 if not line: break |
|
791 result.append(line) |
|
792 return result |
|
793 |
|
794 def tell(self): |
|
795 """Return the current file position. |
|
796 """ |
|
797 if self.closed: |
|
798 raise ValueError("I/O operation on closed file") |
|
799 |
|
800 return self.position |
|
801 |
|
802 def seek(self, pos, whence=os.SEEK_SET): |
|
803 """Seek to a position in the file. |
|
804 """ |
|
805 if self.closed: |
|
806 raise ValueError("I/O operation on closed file") |
|
807 |
|
808 if whence == os.SEEK_SET: |
|
809 self.position = min(max(pos, 0), self.size) |
|
810 elif whence == os.SEEK_CUR: |
|
811 if pos < 0: |
|
812 self.position = max(self.position + pos, 0) |
|
813 else: |
|
814 self.position = min(self.position + pos, self.size) |
|
815 elif whence == os.SEEK_END: |
|
816 self.position = max(min(self.size + pos, self.size), 0) |
|
817 else: |
|
818 raise ValueError("Invalid argument") |
|
819 |
|
820 self.buffer = "" |
|
821 self.fileobj.seek(self.position) |
|
822 |
|
823 def close(self): |
|
824 """Close the file object. |
|
825 """ |
|
826 self.closed = True |
|
827 |
|
828 def __iter__(self): |
|
829 """Get an iterator over the file's lines. |
|
830 """ |
|
831 while True: |
|
832 line = self.readline() |
|
833 if not line: |
|
834 break |
|
835 yield line |
|
836 #class ExFileObject |
|
837 |
|
838 #------------------ |
|
839 # Exported Classes |
|
840 #------------------ |
|
841 class TarInfo(object): |
|
842 """Informational class which holds the details about an |
|
843 archive member given by a tar header block. |
|
844 TarInfo objects are returned by TarFile.getmember(), |
|
845 TarFile.getmembers() and TarFile.gettarinfo() and are |
|
846 usually created internally. |
|
847 """ |
|
848 |
|
849 def __init__(self, name=""): |
|
850 """Construct a TarInfo object. name is the optional name |
|
851 of the member. |
|
852 """ |
|
853 self.name = name # member name (dirnames must end with '/') |
|
854 self.mode = 0666 # file permissions |
|
855 self.uid = 0 # user id |
|
856 self.gid = 0 # group id |
|
857 self.size = 0 # file size |
|
858 self.mtime = 0 # modification time |
|
859 self.chksum = 0 # header checksum |
|
860 self.type = REGTYPE # member type |
|
861 self.linkname = "" # link name |
|
862 self.uname = "user" # user name |
|
863 self.gname = "group" # group name |
|
864 self.devmajor = 0 # device major number |
|
865 self.devminor = 0 # device minor number |
|
866 |
|
867 self.offset = 0 # the tar header starts here |
|
868 self.offset_data = 0 # the file's data starts here |
|
869 |
|
870 def __repr__(self): |
|
871 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) |
|
872 |
|
873 @classmethod |
|
874 def frombuf(cls, buf): |
|
875 """Construct a TarInfo object from a 512 byte string buffer. |
|
876 """ |
|
877 if len(buf) != BLOCKSIZE: |
|
878 raise ValueError("truncated header") |
|
879 if buf.count(NUL) == BLOCKSIZE: |
|
880 raise ValueError("empty header") |
|
881 |
|
882 tarinfo = cls() |
|
883 tarinfo.buf = buf |
|
884 tarinfo.name = nts(buf[0:100]) |
|
885 tarinfo.mode = nti(buf[100:108]) |
|
886 tarinfo.uid = nti(buf[108:116]) |
|
887 tarinfo.gid = nti(buf[116:124]) |
|
888 tarinfo.size = nti(buf[124:136]) |
|
889 tarinfo.mtime = nti(buf[136:148]) |
|
890 tarinfo.chksum = nti(buf[148:156]) |
|
891 tarinfo.type = buf[156:157] |
|
892 tarinfo.linkname = nts(buf[157:257]) |
|
893 tarinfo.uname = nts(buf[265:297]) |
|
894 tarinfo.gname = nts(buf[297:329]) |
|
895 tarinfo.devmajor = nti(buf[329:337]) |
|
896 tarinfo.devminor = nti(buf[337:345]) |
|
897 prefix = nts(buf[345:500]) |
|
898 |
|
899 if prefix and not tarinfo.issparse(): |
|
900 tarinfo.name = prefix + "/" + tarinfo.name |
|
901 |
|
902 if tarinfo.chksum not in calc_chksums(buf): |
|
903 raise ValueError("invalid header") |
|
904 return tarinfo |
|
905 |
|
906 def tobuf(self, posix=False): |
|
907 """Return a tar header as a string of 512 byte blocks. |
|
908 """ |
|
909 buf = "" |
|
910 type = self.type |
|
911 prefix = "" |
|
912 |
|
913 if self.name.endswith("/"): |
|
914 type = DIRTYPE |
|
915 |
|
916 if type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK): |
|
917 # Prevent "././@LongLink" from being normalized. |
|
918 name = self.name |
|
919 else: |
|
920 name = normpath(self.name) |
|
921 |
|
922 if type == DIRTYPE: |
|
923 # directories should end with '/' |
|
924 name += "/" |
|
925 |
|
926 linkname = self.linkname |
|
927 if linkname: |
|
928 # if linkname is empty we end up with a '.' |
|
929 linkname = normpath(linkname) |
|
930 |
|
931 if posix: |
|
932 if self.size > MAXSIZE_MEMBER: |
|
933 raise ValueError("file is too large (>= 8 GB)") |
|
934 |
|
935 if len(self.linkname) > LENGTH_LINK: |
|
936 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK)) |
|
937 |
|
938 if len(name) > LENGTH_NAME: |
|
939 prefix = name[:LENGTH_PREFIX + 1] |
|
940 while prefix and prefix[-1] != "/": |
|
941 prefix = prefix[:-1] |
|
942 |
|
943 name = name[len(prefix):] |
|
944 prefix = prefix[:-1] |
|
945 |
|
946 if not prefix or len(name) > LENGTH_NAME: |
|
947 raise ValueError("name is too long") |
|
948 |
|
949 else: |
|
950 if len(self.linkname) > LENGTH_LINK: |
|
951 buf += self._create_gnulong(self.linkname, GNUTYPE_LONGLINK) |
|
952 |
|
953 if len(name) > LENGTH_NAME: |
|
954 buf += self._create_gnulong(name, GNUTYPE_LONGNAME) |
|
955 |
|
956 parts = [ |
|
957 stn(name, 100), |
|
958 itn(self.mode & 07777, 8, posix), |
|
959 itn(self.uid, 8, posix), |
|
960 itn(self.gid, 8, posix), |
|
961 itn(self.size, 12, posix), |
|
962 itn(self.mtime, 12, posix), |
|
963 " ", # checksum field |
|
964 type, |
|
965 stn(self.linkname, 100), |
|
966 stn(MAGIC, 6), |
|
967 stn(VERSION, 2), |
|
968 stn(self.uname, 32), |
|
969 stn(self.gname, 32), |
|
970 itn(self.devmajor, 8, posix), |
|
971 itn(self.devminor, 8, posix), |
|
972 stn(prefix, 155) |
|
973 ] |
|
974 |
|
975 buf += "".join(parts).ljust(BLOCKSIZE, NUL) |
|
976 chksum = calc_chksums(buf[-BLOCKSIZE:])[0] |
|
977 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:] |
|
978 self.buf = buf |
|
979 return buf |
|
980 |
|
981 def _create_gnulong(self, name, type): |
|
982 """Create a GNU longname/longlink header from name. |
|
983 It consists of an extended tar header, with the length |
|
984 of the longname as size, followed by data blocks, |
|
985 which contain the longname as a null terminated string. |
|
986 """ |
|
987 name += NUL |
|
988 |
|
989 tarinfo = self.__class__() |
|
990 tarinfo.name = "././@LongLink" |
|
991 tarinfo.type = type |
|
992 tarinfo.mode = 0 |
|
993 tarinfo.size = len(name) |
|
994 |
|
995 # create extended header |
|
996 buf = tarinfo.tobuf() |
|
997 # create name blocks |
|
998 buf += name |
|
999 blocks, remainder = divmod(len(name), BLOCKSIZE) |
|
1000 if remainder > 0: |
|
1001 buf += (BLOCKSIZE - remainder) * NUL |
|
1002 return buf |
|
1003 |
|
1004 def isreg(self): |
|
1005 return self.type in REGULAR_TYPES |
|
1006 def isfile(self): |
|
1007 return self.isreg() |
|
1008 def isdir(self): |
|
1009 return self.type == DIRTYPE |
|
1010 def issym(self): |
|
1011 return self.type == SYMTYPE |
|
1012 def islnk(self): |
|
1013 return self.type == LNKTYPE |
|
1014 def ischr(self): |
|
1015 return self.type == CHRTYPE |
|
1016 def isblk(self): |
|
1017 return self.type == BLKTYPE |
|
1018 def isfifo(self): |
|
1019 return self.type == FIFOTYPE |
|
1020 def issparse(self): |
|
1021 return self.type == GNUTYPE_SPARSE |
|
1022 def isdev(self): |
|
1023 return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE) |
|
1024 # class TarInfo |
|
1025 |
|
1026 class TarFile(object): |
|
1027 """The TarFile Class provides an interface to tar archives. |
|
1028 """ |
|
1029 |
|
1030 debug = 0 # May be set from 0 (no msgs) to 3 (all msgs) |
|
1031 |
|
1032 dereference = False # If true, add content of linked file to the |
|
1033 # tar file, else the link. |
|
1034 |
|
1035 ignore_zeros = False # If true, skips empty or invalid blocks and |
|
1036 # continues processing. |
|
1037 |
|
1038 errorlevel = 0 # If 0, fatal errors only appear in debug |
|
1039 # messages (if debug >= 0). If > 0, errors |
|
1040 # are passed to the caller as exceptions. |
|
1041 |
|
1042 posix = False # If True, generates POSIX.1-1990-compliant |
|
1043 # archives (no GNU extensions!) |
|
1044 |
|
1045 fileobject = ExFileObject |
|
1046 |
|
1047 def __init__(self, name=None, mode="r", fileobj=None): |
|
1048 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to |
|
1049 read from an existing archive, 'a' to append data to an existing |
|
1050 file or 'w' to create a new file overwriting an existing one. `mode' |
|
1051 defaults to 'r'. |
|
1052 If `fileobj' is given, it is used for reading or writing data. If it |
|
1053 can be determined, `mode' is overridden by `fileobj's mode. |
|
1054 `fileobj' is not closed, when TarFile is closed. |
|
1055 """ |
|
1056 if len(mode) > 1 or mode not in "raw": |
|
1057 raise ValueError("mode must be 'r', 'a' or 'w'") |
|
1058 self._mode = mode |
|
1059 self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode] |
|
1060 |
|
1061 if not fileobj: |
|
1062 fileobj = file(name, self.mode) |
|
1063 self._extfileobj = False |
|
1064 else: |
|
1065 if name is None and hasattr(fileobj, "name"): |
|
1066 name = fileobj.name |
|
1067 if hasattr(fileobj, "mode"): |
|
1068 self.mode = fileobj.mode |
|
1069 self._extfileobj = True |
|
1070 self.name = os.path.abspath(name) if name else None |
|
1071 self.fileobj = fileobj |
|
1072 |
|
1073 # Init datastructures |
|
1074 self.closed = False |
|
1075 self.members = [] # list of members as TarInfo objects |
|
1076 self._loaded = False # flag if all members have been read |
|
1077 self.offset = self.fileobj.tell() |
|
1078 # current position in the archive file |
|
1079 self.inodes = {} # dictionary caching the inodes of |
|
1080 # archive members already added |
|
1081 |
|
1082 if self._mode == "r": |
|
1083 self.firstmember = None |
|
1084 self.firstmember = self.next() |
|
1085 |
|
1086 if self._mode == "a": |
|
1087 # Move to the end of the archive, |
|
1088 # before the first empty block. |
|
1089 self.firstmember = None |
|
1090 while True: |
|
1091 try: |
|
1092 tarinfo = self.next() |
|
1093 except ReadError: |
|
1094 self.fileobj.seek(0) |
|
1095 break |
|
1096 if tarinfo is None: |
|
1097 self.fileobj.seek(- BLOCKSIZE, 1) |
|
1098 break |
|
1099 |
|
1100 if self._mode in "aw": |
|
1101 self._loaded = True |
|
1102 |
|
1103 #-------------------------------------------------------------------------- |
|
1104 # Below are the classmethods which act as alternate constructors to the |
|
1105 # TarFile class. The open() method is the only one that is needed for |
|
1106 # public use; it is the "super"-constructor and is able to select an |
|
1107 # adequate "sub"-constructor for a particular compression using the mapping |
|
1108 # from OPEN_METH. |
|
1109 # |
|
1110 # This concept allows one to subclass TarFile without losing the comfort of |
|
1111 # the super-constructor. A sub-constructor is registered and made available |
|
1112 # by adding it to the mapping in OPEN_METH. |
|
1113 |
|
1114 @classmethod |
|
1115 def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512): |
|
1116 """Open a tar archive for reading, writing or appending. Return |
|
1117 an appropriate TarFile class. |
|
1118 |
|
1119 mode: |
|
1120 'r' or 'r:*' open for reading with transparent compression |
|
1121 'r:' open for reading exclusively uncompressed |
|
1122 'r:gz' open for reading with gzip compression |
|
1123 'r:bz2' open for reading with bzip2 compression |
|
1124 'a' or 'a:' open for appending |
|
1125 'w' or 'w:' open for writing without compression |
|
1126 'w:gz' open for writing with gzip compression |
|
1127 'w:bz2' open for writing with bzip2 compression |
|
1128 |
|
1129 'r|*' open a stream of tar blocks with transparent compression |
|
1130 'r|' open an uncompressed stream of tar blocks for reading |
|
1131 'r|gz' open a gzip compressed stream of tar blocks |
|
1132 'r|bz2' open a bzip2 compressed stream of tar blocks |
|
1133 'w|' open an uncompressed stream for writing |
|
1134 'w|gz' open a gzip compressed stream for writing |
|
1135 'w|bz2' open a bzip2 compressed stream for writing |
|
1136 """ |
|
1137 |
|
1138 if not name and not fileobj: |
|
1139 raise ValueError("nothing to open") |
|
1140 |
|
1141 if mode in ("r", "r:*"): |
|
1142 # Find out which *open() is appropriate for opening the file. |
|
1143 for comptype in cls.OPEN_METH: |
|
1144 func = getattr(cls, cls.OPEN_METH[comptype]) |
|
1145 if fileobj is not None: |
|
1146 saved_pos = fileobj.tell() |
|
1147 try: |
|
1148 return func(name, "r", fileobj) |
|
1149 except (ReadError, CompressionError): |
|
1150 if fileobj is not None: |
|
1151 fileobj.seek(saved_pos) |
|
1152 continue |
|
1153 raise ReadError("file could not be opened successfully") |
|
1154 |
|
1155 elif ":" in mode: |
|
1156 filemode, comptype = mode.split(":", 1) |
|
1157 filemode = filemode or "r" |
|
1158 comptype = comptype or "tar" |
|
1159 |
|
1160 # Select the *open() function according to |
|
1161 # given compression. |
|
1162 if comptype in cls.OPEN_METH: |
|
1163 func = getattr(cls, cls.OPEN_METH[comptype]) |
|
1164 else: |
|
1165 raise CompressionError("unknown compression type %r" % comptype) |
|
1166 return func(name, filemode, fileobj) |
|
1167 |
|
1168 elif "|" in mode: |
|
1169 filemode, comptype = mode.split("|", 1) |
|
1170 filemode = filemode or "r" |
|
1171 comptype = comptype or "tar" |
|
1172 |
|
1173 if filemode not in "rw": |
|
1174 raise ValueError("mode must be 'r' or 'w'") |
|
1175 |
|
1176 t = cls(name, filemode, |
|
1177 _Stream(name, filemode, comptype, fileobj, bufsize)) |
|
1178 t._extfileobj = False |
|
1179 return t |
|
1180 |
|
1181 elif mode in "aw": |
|
1182 return cls.taropen(name, mode, fileobj) |
|
1183 |
|
1184 raise ValueError("undiscernible mode") |
|
1185 |
|
1186 @classmethod |
|
1187 def taropen(cls, name, mode="r", fileobj=None): |
|
1188 """Open uncompressed tar archive name for reading or writing. |
|
1189 """ |
|
1190 if len(mode) > 1 or mode not in "raw": |
|
1191 raise ValueError("mode must be 'r', 'a' or 'w'") |
|
1192 return cls(name, mode, fileobj) |
|
1193 |
|
1194 @classmethod |
|
1195 def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9): |
|
1196 """Open gzip compressed tar archive name for reading or writing. |
|
1197 Appending is not allowed. |
|
1198 """ |
|
1199 if len(mode) > 1 or mode not in "rw": |
|
1200 raise ValueError("mode must be 'r' or 'w'") |
|
1201 |
|
1202 try: |
|
1203 import gzip |
|
1204 gzip.GzipFile |
|
1205 except (ImportError, AttributeError): |
|
1206 raise CompressionError("gzip module is not available") |
|
1207 |
|
1208 if fileobj is None: |
|
1209 fileobj = file(name, mode + "b") |
|
1210 |
|
1211 try: |
|
1212 t = cls.taropen(name, mode, |
|
1213 gzip.GzipFile(name, mode, compresslevel, fileobj)) |
|
1214 except IOError: |
|
1215 raise ReadError("not a gzip file") |
|
1216 t._extfileobj = False |
|
1217 return t |
|
1218 |
|
1219 @classmethod |
|
1220 def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9): |
|
1221 """Open bzip2 compressed tar archive name for reading or writing. |
|
1222 Appending is not allowed. |
|
1223 """ |
|
1224 if len(mode) > 1 or mode not in "rw": |
|
1225 raise ValueError("mode must be 'r' or 'w'.") |
|
1226 |
|
1227 try: |
|
1228 import bz2 |
|
1229 except ImportError: |
|
1230 raise CompressionError("bz2 module is not available") |
|
1231 |
|
1232 if fileobj is not None: |
|
1233 fileobj = _BZ2Proxy(fileobj, mode) |
|
1234 else: |
|
1235 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel) |
|
1236 |
|
1237 try: |
|
1238 t = cls.taropen(name, mode, fileobj) |
|
1239 except IOError: |
|
1240 raise ReadError("not a bzip2 file") |
|
1241 t._extfileobj = False |
|
1242 return t |
|
1243 |
|
1244 # All *open() methods are registered here. |
|
1245 OPEN_METH = { |
|
1246 "tar": "taropen", # uncompressed tar |
|
1247 "gz": "gzopen", # gzip compressed tar |
|
1248 "bz2": "bz2open" # bzip2 compressed tar |
|
1249 } |
|
1250 |
|
1251 #-------------------------------------------------------------------------- |
|
1252 # The public methods which TarFile provides: |
|
1253 |
|
1254 def close(self): |
|
1255 """Close the TarFile. In write-mode, two finishing zero blocks are |
|
1256 appended to the archive. |
|
1257 """ |
|
1258 if self.closed: |
|
1259 return |
|
1260 |
|
1261 if self._mode in "aw": |
|
1262 self.fileobj.write(NUL * (BLOCKSIZE * 2)) |
|
1263 self.offset += (BLOCKSIZE * 2) |
|
1264 # fill up the end with zero-blocks |
|
1265 # (like option -b20 for tar does) |
|
1266 blocks, remainder = divmod(self.offset, RECORDSIZE) |
|
1267 if remainder > 0: |
|
1268 self.fileobj.write(NUL * (RECORDSIZE - remainder)) |
|
1269 |
|
1270 if not self._extfileobj: |
|
1271 self.fileobj.close() |
|
1272 self.closed = True |
|
1273 |
|
1274 def getmember(self, name): |
|
1275 """Return a TarInfo object for member `name'. If `name' can not be |
|
1276 found in the archive, KeyError is raised. If a member occurs more |
|
1277 than once in the archive, its last occurence is assumed to be the |
|
1278 most up-to-date version. |
|
1279 """ |
|
1280 tarinfo = self._getmember(name) |
|
1281 if tarinfo is None: |
|
1282 raise KeyError("filename %r not found" % name) |
|
1283 return tarinfo |
|
1284 |
|
1285 def getmembers(self): |
|
1286 """Return the members of the archive as a list of TarInfo objects. The |
|
1287 list has the same order as the members in the archive. |
|
1288 """ |
|
1289 self._check() |
|
1290 if not self._loaded: # if we want to obtain a list of |
|
1291 self._load() # all members, we first have to |
|
1292 # scan the whole archive. |
|
1293 return self.members |
|
1294 |
|
1295 def getnames(self): |
|
1296 """Return the members of the archive as a list of their names. It has |
|
1297 the same order as the list returned by getmembers(). |
|
1298 """ |
|
1299 return [tarinfo.name for tarinfo in self.getmembers()] |
|
1300 |
|
1301 def gettarinfo(self, name=None, arcname=None, fileobj=None): |
|
1302 """Create a TarInfo object for either the file `name' or the file |
|
1303 object `fileobj' (using os.fstat on its file descriptor). You can |
|
1304 modify some of the TarInfo's attributes before you add it using |
|
1305 addfile(). If given, `arcname' specifies an alternative name for the |
|
1306 file in the archive. |
|
1307 """ |
|
1308 self._check("aw") |
|
1309 |
|
1310 # When fileobj is given, replace name by |
|
1311 # fileobj's real name. |
|
1312 if fileobj is not None: |
|
1313 name = fileobj.name |
|
1314 |
|
1315 # Building the name of the member in the archive. |
|
1316 # Backward slashes are converted to forward slashes, |
|
1317 # Absolute paths are turned to relative paths. |
|
1318 if arcname is None: |
|
1319 arcname = name |
|
1320 arcname = normpath(arcname) |
|
1321 drv, arcname = os.path.splitdrive(arcname) |
|
1322 while arcname[0:1] == "/": |
|
1323 arcname = arcname[1:] |
|
1324 |
|
1325 # Now, fill the TarInfo object with |
|
1326 # information specific for the file. |
|
1327 tarinfo = TarInfo() |
|
1328 |
|
1329 # Use os.stat or os.lstat, depending on platform |
|
1330 # and if symlinks shall be resolved. |
|
1331 if fileobj is None: |
|
1332 if hasattr(os, "lstat") and not self.dereference: |
|
1333 statres = os.lstat(name) |
|
1334 else: |
|
1335 statres = os.stat(name) |
|
1336 else: |
|
1337 statres = os.fstat(fileobj.fileno()) |
|
1338 linkname = "" |
|
1339 |
|
1340 stmd = statres.st_mode |
|
1341 if stat.S_ISREG(stmd): |
|
1342 inode = (statres.st_ino, statres.st_dev) |
|
1343 if not self.dereference and \ |
|
1344 statres.st_nlink > 1 and inode in self.inodes: |
|
1345 # Is it a hardlink to an already |
|
1346 # archived file? |
|
1347 type = LNKTYPE |
|
1348 linkname = self.inodes[inode] |
|
1349 else: |
|
1350 # The inode is added only if its valid. |
|
1351 # For win32 it is always 0. |
|
1352 type = REGTYPE |
|
1353 if inode[0]: |
|
1354 self.inodes[inode] = arcname |
|
1355 elif stat.S_ISDIR(stmd): |
|
1356 type = DIRTYPE |
|
1357 if arcname[-1:] != "/": |
|
1358 arcname += "/" |
|
1359 elif stat.S_ISFIFO(stmd): |
|
1360 type = FIFOTYPE |
|
1361 elif stat.S_ISLNK(stmd): |
|
1362 type = SYMTYPE |
|
1363 linkname = os.readlink(name) |
|
1364 elif stat.S_ISCHR(stmd): |
|
1365 type = CHRTYPE |
|
1366 elif stat.S_ISBLK(stmd): |
|
1367 type = BLKTYPE |
|
1368 else: |
|
1369 return None |
|
1370 |
|
1371 # Fill the TarInfo object with all |
|
1372 # information we can get. |
|
1373 tarinfo.name = arcname |
|
1374 tarinfo.mode = stmd |
|
1375 tarinfo.uid = statres.st_uid |
|
1376 tarinfo.gid = statres.st_gid |
|
1377 if stat.S_ISREG(stmd): |
|
1378 tarinfo.size = statres.st_size |
|
1379 else: |
|
1380 tarinfo.size = 0L |
|
1381 tarinfo.mtime = statres.st_mtime |
|
1382 tarinfo.type = type |
|
1383 tarinfo.linkname = linkname |
|
1384 if pwd: |
|
1385 try: |
|
1386 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0] |
|
1387 except KeyError: |
|
1388 pass |
|
1389 if grp: |
|
1390 try: |
|
1391 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0] |
|
1392 except KeyError: |
|
1393 pass |
|
1394 |
|
1395 if type in (CHRTYPE, BLKTYPE): |
|
1396 if hasattr(os, "major") and hasattr(os, "minor"): |
|
1397 tarinfo.devmajor = os.major(statres.st_rdev) |
|
1398 tarinfo.devminor = os.minor(statres.st_rdev) |
|
1399 return tarinfo |
|
1400 |
|
1401 def list(self, verbose=True): |
|
1402 """Print a table of contents to sys.stdout. If `verbose' is False, only |
|
1403 the names of the members are printed. If it is True, an `ls -l'-like |
|
1404 output is produced. |
|
1405 """ |
|
1406 self._check() |
|
1407 |
|
1408 for tarinfo in self: |
|
1409 if verbose: |
|
1410 print filemode(tarinfo.mode), |
|
1411 print "%s/%s" % (tarinfo.uname or tarinfo.uid, |
|
1412 tarinfo.gname or tarinfo.gid), |
|
1413 if tarinfo.ischr() or tarinfo.isblk(): |
|
1414 print "%10s" % ("%d,%d" \ |
|
1415 % (tarinfo.devmajor, tarinfo.devminor)), |
|
1416 else: |
|
1417 print "%10d" % tarinfo.size, |
|
1418 print "%d-%02d-%02d %02d:%02d:%02d" \ |
|
1419 % time.localtime(tarinfo.mtime)[:6], |
|
1420 |
|
1421 print tarinfo.name, |
|
1422 |
|
1423 if verbose: |
|
1424 if tarinfo.issym(): |
|
1425 print "->", tarinfo.linkname, |
|
1426 if tarinfo.islnk(): |
|
1427 print "link to", tarinfo.linkname, |
|
1428 print |
|
1429 |
|
1430 def add(self, name, arcname=None, recursive=True): |
|
1431 """Add the file `name' to the archive. `name' may be any type of file |
|
1432 (directory, fifo, symbolic link, etc.). If given, `arcname' |
|
1433 specifies an alternative name for the file in the archive. |
|
1434 Directories are added recursively by default. This can be avoided by |
|
1435 setting `recursive' to False. |
|
1436 """ |
|
1437 self._check("aw") |
|
1438 |
|
1439 if arcname is None: |
|
1440 arcname = name |
|
1441 |
|
1442 # Skip if somebody tries to archive the archive... |
|
1443 if self.name is not None and os.path.abspath(name) == self.name: |
|
1444 self._dbg(2, "tarfile: Skipped %r" % name) |
|
1445 return |
|
1446 |
|
1447 # Special case: The user wants to add the current |
|
1448 # working directory. |
|
1449 if name == ".": |
|
1450 if recursive: |
|
1451 if arcname == ".": |
|
1452 arcname = "" |
|
1453 for f in os.listdir("."): |
|
1454 self.add(f, os.path.join(arcname, f)) |
|
1455 return |
|
1456 |
|
1457 self._dbg(1, name) |
|
1458 |
|
1459 # Create a TarInfo object from the file. |
|
1460 tarinfo = self.gettarinfo(name, arcname) |
|
1461 |
|
1462 if tarinfo is None: |
|
1463 self._dbg(1, "tarfile: Unsupported type %r" % name) |
|
1464 return |
|
1465 |
|
1466 # Append the tar header and data to the archive. |
|
1467 if tarinfo.isreg(): |
|
1468 f = file(name, "rb") |
|
1469 self.addfile(tarinfo, f) |
|
1470 f.close() |
|
1471 |
|
1472 elif tarinfo.isdir(): |
|
1473 self.addfile(tarinfo) |
|
1474 if recursive: |
|
1475 for f in os.listdir(name): |
|
1476 self.add(os.path.join(name, f), os.path.join(arcname, f)) |
|
1477 |
|
1478 else: |
|
1479 self.addfile(tarinfo) |
|
1480 |
|
1481 def addfile(self, tarinfo, fileobj=None): |
|
1482 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is |
|
1483 given, tarinfo.size bytes are read from it and added to the archive. |
|
1484 You can create TarInfo objects using gettarinfo(). |
|
1485 On Windows platforms, `fileobj' should always be opened with mode |
|
1486 'rb' to avoid irritation about the file size. |
|
1487 """ |
|
1488 self._check("aw") |
|
1489 |
|
1490 tarinfo = copy.copy(tarinfo) |
|
1491 |
|
1492 buf = tarinfo.tobuf(self.posix) |
|
1493 self.fileobj.write(buf) |
|
1494 self.offset += len(buf) |
|
1495 |
|
1496 # If there's data to follow, append it. |
|
1497 if fileobj is not None: |
|
1498 copyfileobj(fileobj, self.fileobj, tarinfo.size) |
|
1499 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) |
|
1500 if remainder > 0: |
|
1501 self.fileobj.write(NUL * (BLOCKSIZE - remainder)) |
|
1502 blocks += 1 |
|
1503 self.offset += blocks * BLOCKSIZE |
|
1504 |
|
1505 self.members.append(tarinfo) |
|
1506 |
|
1507 def extractall(self, path=".", members=None): |
|
1508 """Extract all members from the archive to the current working |
|
1509 directory and set owner, modification time and permissions on |
|
1510 directories afterwards. `path' specifies a different directory |
|
1511 to extract to. `members' is optional and must be a subset of the |
|
1512 list returned by getmembers(). |
|
1513 """ |
|
1514 directories = [] |
|
1515 |
|
1516 if members is None: |
|
1517 members = self |
|
1518 |
|
1519 for tarinfo in members: |
|
1520 if tarinfo.isdir(): |
|
1521 # Extract directories with a safe mode. |
|
1522 directories.append(tarinfo) |
|
1523 tarinfo = copy.copy(tarinfo) |
|
1524 tarinfo.mode = 0700 |
|
1525 self.extract(tarinfo, path) |
|
1526 |
|
1527 # Reverse sort directories. |
|
1528 directories.sort(lambda a, b: cmp(a.name, b.name)) |
|
1529 directories.reverse() |
|
1530 |
|
1531 # Set correct owner, mtime and filemode on directories. |
|
1532 for tarinfo in directories: |
|
1533 dirpath = os.path.join(path, tarinfo.name) |
|
1534 try: |
|
1535 self.chown(tarinfo, dirpath) |
|
1536 self.utime(tarinfo, dirpath) |
|
1537 self.chmod(tarinfo, dirpath) |
|
1538 except ExtractError, e: |
|
1539 if self.errorlevel > 1: |
|
1540 raise |
|
1541 else: |
|
1542 self._dbg(1, "tarfile: %s" % e) |
|
1543 |
|
1544 def extract(self, member, path=""): |
|
1545 """Extract a member from the archive to the current working directory, |
|
1546 using its full name. Its file information is extracted as accurately |
|
1547 as possible. `member' may be a filename or a TarInfo object. You can |
|
1548 specify a different directory using `path'. |
|
1549 """ |
|
1550 self._check("r") |
|
1551 |
|
1552 if isinstance(member, TarInfo): |
|
1553 tarinfo = member |
|
1554 else: |
|
1555 tarinfo = self.getmember(member) |
|
1556 |
|
1557 # Prepare the link target for makelink(). |
|
1558 if tarinfo.islnk(): |
|
1559 tarinfo._link_target = os.path.join(path, tarinfo.linkname) |
|
1560 |
|
1561 try: |
|
1562 self._extract_member(tarinfo, os.path.join(path, tarinfo.name)) |
|
1563 except EnvironmentError, e: |
|
1564 if self.errorlevel > 0: |
|
1565 raise |
|
1566 else: |
|
1567 if e.filename is None: |
|
1568 self._dbg(1, "tarfile: %s" % e.strerror) |
|
1569 else: |
|
1570 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) |
|
1571 except ExtractError, e: |
|
1572 if self.errorlevel > 1: |
|
1573 raise |
|
1574 else: |
|
1575 self._dbg(1, "tarfile: %s" % e) |
|
1576 |
|
1577 def extractfile(self, member): |
|
1578 """Extract a member from the archive as a file object. `member' may be |
|
1579 a filename or a TarInfo object. If `member' is a regular file, a |
|
1580 file-like object is returned. If `member' is a link, a file-like |
|
1581 object is constructed from the link's target. If `member' is none of |
|
1582 the above, None is returned. |
|
1583 The file-like object is read-only and provides the following |
|
1584 methods: read(), readline(), readlines(), seek() and tell() |
|
1585 """ |
|
1586 self._check("r") |
|
1587 |
|
1588 if isinstance(member, TarInfo): |
|
1589 tarinfo = member |
|
1590 else: |
|
1591 tarinfo = self.getmember(member) |
|
1592 |
|
1593 if tarinfo.isreg(): |
|
1594 return self.fileobject(self, tarinfo) |
|
1595 |
|
1596 elif tarinfo.type not in SUPPORTED_TYPES: |
|
1597 # If a member's type is unknown, it is treated as a |
|
1598 # regular file. |
|
1599 return self.fileobject(self, tarinfo) |
|
1600 |
|
1601 elif tarinfo.islnk() or tarinfo.issym(): |
|
1602 if isinstance(self.fileobj, _Stream): |
|
1603 # A small but ugly workaround for the case that someone tries |
|
1604 # to extract a (sym)link as a file-object from a non-seekable |
|
1605 # stream of tar blocks. |
|
1606 raise StreamError("cannot extract (sym)link as file object") |
|
1607 else: |
|
1608 # A (sym)link's file object is its target's file object. |
|
1609 return self.extractfile(self._getmember(tarinfo.linkname, |
|
1610 tarinfo)) |
|
1611 else: |
|
1612 # If there's no data associated with the member (directory, chrdev, |
|
1613 # blkdev, etc.), return None instead of a file object. |
|
1614 return None |
|
1615 |
|
1616 def _extract_member(self, tarinfo, targetpath): |
|
1617 """Extract the TarInfo object tarinfo to a physical |
|
1618 file called targetpath. |
|
1619 """ |
|
1620 # Fetch the TarInfo object for the given name |
|
1621 # and build the destination pathname, replacing |
|
1622 # forward slashes to platform specific separators. |
|
1623 if targetpath[-1:] == "/": |
|
1624 targetpath = targetpath[:-1] |
|
1625 targetpath = os.path.normpath(targetpath) |
|
1626 |
|
1627 # Create all upper directories. |
|
1628 upperdirs = os.path.dirname(targetpath) |
|
1629 if upperdirs and not os.path.exists(upperdirs): |
|
1630 # Create directories that are not part of the archive with |
|
1631 # default permissions. |
|
1632 os.makedirs(upperdirs) |
|
1633 |
|
1634 if tarinfo.islnk() or tarinfo.issym(): |
|
1635 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname)) |
|
1636 else: |
|
1637 self._dbg(1, tarinfo.name) |
|
1638 |
|
1639 if tarinfo.isreg(): |
|
1640 self.makefile(tarinfo, targetpath) |
|
1641 elif tarinfo.isdir(): |
|
1642 self.makedir(tarinfo, targetpath) |
|
1643 elif tarinfo.isfifo(): |
|
1644 self.makefifo(tarinfo, targetpath) |
|
1645 elif tarinfo.ischr() or tarinfo.isblk(): |
|
1646 self.makedev(tarinfo, targetpath) |
|
1647 elif tarinfo.islnk() or tarinfo.issym(): |
|
1648 self.makelink(tarinfo, targetpath) |
|
1649 elif tarinfo.type not in SUPPORTED_TYPES: |
|
1650 self.makeunknown(tarinfo, targetpath) |
|
1651 else: |
|
1652 self.makefile(tarinfo, targetpath) |
|
1653 |
|
1654 self.chown(tarinfo, targetpath) |
|
1655 if not tarinfo.issym(): |
|
1656 self.chmod(tarinfo, targetpath) |
|
1657 self.utime(tarinfo, targetpath) |
|
1658 |
|
1659 #-------------------------------------------------------------------------- |
|
1660 # Below are the different file methods. They are called via |
|
1661 # _extract_member() when extract() is called. They can be replaced in a |
|
1662 # subclass to implement other functionality. |
|
1663 |
|
1664 def makedir(self, tarinfo, targetpath): |
|
1665 """Make a directory called targetpath. |
|
1666 """ |
|
1667 try: |
|
1668 # Use a safe mode for the directory, the real mode is set |
|
1669 # later in _extract_member(). |
|
1670 os.mkdir(targetpath, 0700) |
|
1671 except EnvironmentError, e: |
|
1672 if e.errno != errno.EEXIST: |
|
1673 raise |
|
1674 |
|
1675 def makefile(self, tarinfo, targetpath): |
|
1676 """Make a file called targetpath. |
|
1677 """ |
|
1678 source = self.extractfile(tarinfo) |
|
1679 target = file(targetpath, "wb") |
|
1680 copyfileobj(source, target) |
|
1681 source.close() |
|
1682 target.close() |
|
1683 |
|
1684 def makeunknown(self, tarinfo, targetpath): |
|
1685 """Make a file from a TarInfo object with an unknown type |
|
1686 at targetpath. |
|
1687 """ |
|
1688 self.makefile(tarinfo, targetpath) |
|
1689 self._dbg(1, "tarfile: Unknown file type %r, " \ |
|
1690 "extracted as regular file." % tarinfo.type) |
|
1691 |
|
1692 def makefifo(self, tarinfo, targetpath): |
|
1693 """Make a fifo called targetpath. |
|
1694 """ |
|
1695 if hasattr(os, "mkfifo"): |
|
1696 os.mkfifo(targetpath) |
|
1697 else: |
|
1698 raise ExtractError("fifo not supported by system") |
|
1699 |
|
1700 def makedev(self, tarinfo, targetpath): |
|
1701 """Make a character or block device called targetpath. |
|
1702 """ |
|
1703 if not hasattr(os, "mknod") or not hasattr(os, "makedev"): |
|
1704 raise ExtractError("special devices not supported by system") |
|
1705 |
|
1706 mode = tarinfo.mode |
|
1707 if tarinfo.isblk(): |
|
1708 mode |= stat.S_IFBLK |
|
1709 else: |
|
1710 mode |= stat.S_IFCHR |
|
1711 |
|
1712 os.mknod(targetpath, mode, |
|
1713 os.makedev(tarinfo.devmajor, tarinfo.devminor)) |
|
1714 |
|
1715 def makelink(self, tarinfo, targetpath): |
|
1716 """Make a (symbolic) link called targetpath. If it cannot be created |
|
1717 (platform limitation), we try to make a copy of the referenced file |
|
1718 instead of a link. |
|
1719 """ |
|
1720 linkpath = tarinfo.linkname |
|
1721 try: |
|
1722 if tarinfo.issym(): |
|
1723 os.symlink(linkpath, targetpath) |
|
1724 else: |
|
1725 # See extract(). |
|
1726 os.link(tarinfo._link_target, targetpath) |
|
1727 except AttributeError: |
|
1728 if tarinfo.issym(): |
|
1729 linkpath = os.path.join(os.path.dirname(tarinfo.name), |
|
1730 linkpath) |
|
1731 linkpath = normpath(linkpath) |
|
1732 |
|
1733 try: |
|
1734 self._extract_member(self.getmember(linkpath), targetpath) |
|
1735 except (EnvironmentError, KeyError), e: |
|
1736 linkpath = os.path.normpath(linkpath) |
|
1737 try: |
|
1738 shutil.copy2(linkpath, targetpath) |
|
1739 except EnvironmentError, e: |
|
1740 raise IOError("link could not be created") |
|
1741 |
|
1742 def chown(self, tarinfo, targetpath): |
|
1743 """Set owner of targetpath according to tarinfo. |
|
1744 """ |
|
1745 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0: |
|
1746 # We have to be root to do so. |
|
1747 try: |
|
1748 g = grp.getgrnam(tarinfo.gname)[2] |
|
1749 except KeyError: |
|
1750 try: |
|
1751 g = grp.getgrgid(tarinfo.gid)[2] |
|
1752 except KeyError: |
|
1753 g = os.getgid() |
|
1754 try: |
|
1755 u = pwd.getpwnam(tarinfo.uname)[2] |
|
1756 except KeyError: |
|
1757 try: |
|
1758 u = pwd.getpwuid(tarinfo.uid)[2] |
|
1759 except KeyError: |
|
1760 u = os.getuid() |
|
1761 try: |
|
1762 if tarinfo.issym() and hasattr(os, "lchown"): |
|
1763 os.lchown(targetpath, u, g) |
|
1764 else: |
|
1765 if sys.platform != "os2emx": |
|
1766 os.chown(targetpath, u, g) |
|
1767 except EnvironmentError, e: |
|
1768 raise ExtractError("could not change owner") |
|
1769 |
|
1770 def chmod(self, tarinfo, targetpath): |
|
1771 """Set file permissions of targetpath according to tarinfo. |
|
1772 """ |
|
1773 if hasattr(os, 'chmod'): |
|
1774 try: |
|
1775 os.chmod(targetpath, tarinfo.mode) |
|
1776 except EnvironmentError, e: |
|
1777 raise ExtractError("could not change mode") |
|
1778 |
|
1779 def utime(self, tarinfo, targetpath): |
|
1780 """Set modification time of targetpath according to tarinfo. |
|
1781 """ |
|
1782 if not hasattr(os, 'utime'): |
|
1783 return |
|
1784 if sys.platform == "win32" and tarinfo.isdir(): |
|
1785 # According to msdn.microsoft.com, it is an error (EACCES) |
|
1786 # to use utime() on directories. |
|
1787 return |
|
1788 try: |
|
1789 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) |
|
1790 except EnvironmentError, e: |
|
1791 raise ExtractError("could not change modification time") |
|
1792 |
|
1793 #-------------------------------------------------------------------------- |
|
1794 def next(self): |
|
1795 """Return the next member of the archive as a TarInfo object, when |
|
1796 TarFile is opened for reading. Return None if there is no more |
|
1797 available. |
|
1798 """ |
|
1799 self._check("ra") |
|
1800 if self.firstmember is not None: |
|
1801 m = self.firstmember |
|
1802 self.firstmember = None |
|
1803 return m |
|
1804 |
|
1805 # Read the next block. |
|
1806 self.fileobj.seek(self.offset) |
|
1807 while True: |
|
1808 buf = self.fileobj.read(BLOCKSIZE) |
|
1809 if not buf: |
|
1810 return None |
|
1811 |
|
1812 try: |
|
1813 tarinfo = TarInfo.frombuf(buf) |
|
1814 |
|
1815 # Set the TarInfo object's offset to the current position of the |
|
1816 # TarFile and set self.offset to the position where the data blocks |
|
1817 # should begin. |
|
1818 tarinfo.offset = self.offset |
|
1819 self.offset += BLOCKSIZE |
|
1820 |
|
1821 tarinfo = self.proc_member(tarinfo) |
|
1822 |
|
1823 except ValueError, e: |
|
1824 if self.ignore_zeros: |
|
1825 self._dbg(2, "0x%X: empty or invalid block: %s" % |
|
1826 (self.offset, e)) |
|
1827 self.offset += BLOCKSIZE |
|
1828 continue |
|
1829 else: |
|
1830 if self.offset == 0: |
|
1831 raise ReadError("empty, unreadable or compressed " |
|
1832 "file: %s" % e) |
|
1833 return None |
|
1834 break |
|
1835 |
|
1836 # Some old tar programs represent a directory as a regular |
|
1837 # file with a trailing slash. |
|
1838 if tarinfo.isreg() and tarinfo.name.endswith("/"): |
|
1839 tarinfo.type = DIRTYPE |
|
1840 |
|
1841 # Directory names should have a '/' at the end. |
|
1842 if tarinfo.isdir() and not tarinfo.name.endswith("/"): |
|
1843 tarinfo.name += "/" |
|
1844 |
|
1845 self.members.append(tarinfo) |
|
1846 return tarinfo |
|
1847 |
|
1848 #-------------------------------------------------------------------------- |
|
1849 # The following are methods that are called depending on the type of a |
|
1850 # member. The entry point is proc_member() which is called with a TarInfo |
|
1851 # object created from the header block from the current offset. The |
|
1852 # proc_member() method can be overridden in a subclass to add custom |
|
1853 # proc_*() methods. A proc_*() method MUST implement the following |
|
1854 # operations: |
|
1855 # 1. Set tarinfo.offset_data to the position where the data blocks begin, |
|
1856 # if there is data that follows. |
|
1857 # 2. Set self.offset to the position where the next member's header will |
|
1858 # begin. |
|
1859 # 3. Return tarinfo or another valid TarInfo object. |
|
1860 def proc_member(self, tarinfo): |
|
1861 """Choose the right processing method for tarinfo depending |
|
1862 on its type and call it. |
|
1863 """ |
|
1864 if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK): |
|
1865 return self.proc_gnulong(tarinfo) |
|
1866 elif tarinfo.type == GNUTYPE_SPARSE: |
|
1867 return self.proc_sparse(tarinfo) |
|
1868 else: |
|
1869 return self.proc_builtin(tarinfo) |
|
1870 |
|
1871 def proc_builtin(self, tarinfo): |
|
1872 """Process a builtin type member or an unknown member |
|
1873 which will be treated as a regular file. |
|
1874 """ |
|
1875 tarinfo.offset_data = self.offset |
|
1876 if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES: |
|
1877 # Skip the following data blocks. |
|
1878 self.offset += self._block(tarinfo.size) |
|
1879 return tarinfo |
|
1880 |
|
1881 def proc_gnulong(self, tarinfo): |
|
1882 """Process the blocks that hold a GNU longname |
|
1883 or longlink member. |
|
1884 """ |
|
1885 buf = "" |
|
1886 count = tarinfo.size |
|
1887 while count > 0: |
|
1888 block = self.fileobj.read(BLOCKSIZE) |
|
1889 buf += block |
|
1890 self.offset += BLOCKSIZE |
|
1891 count -= BLOCKSIZE |
|
1892 |
|
1893 # Fetch the next header and process it. |
|
1894 b = self.fileobj.read(BLOCKSIZE) |
|
1895 t = TarInfo.frombuf(b) |
|
1896 t.offset = self.offset |
|
1897 self.offset += BLOCKSIZE |
|
1898 next = self.proc_member(t) |
|
1899 |
|
1900 # Patch the TarInfo object from the next header with |
|
1901 # the longname information. |
|
1902 next.offset = tarinfo.offset |
|
1903 if tarinfo.type == GNUTYPE_LONGNAME: |
|
1904 next.name = nts(buf) |
|
1905 elif tarinfo.type == GNUTYPE_LONGLINK: |
|
1906 next.linkname = nts(buf) |
|
1907 |
|
1908 return next |
|
1909 |
|
1910 def proc_sparse(self, tarinfo): |
|
1911 """Process a GNU sparse header plus extra headers. |
|
1912 """ |
|
1913 buf = tarinfo.buf |
|
1914 sp = _ringbuffer() |
|
1915 pos = 386 |
|
1916 lastpos = 0L |
|
1917 realpos = 0L |
|
1918 # There are 4 possible sparse structs in the |
|
1919 # first header. |
|
1920 for i in xrange(4): |
|
1921 try: |
|
1922 offset = nti(buf[pos:pos + 12]) |
|
1923 numbytes = nti(buf[pos + 12:pos + 24]) |
|
1924 except ValueError: |
|
1925 break |
|
1926 if offset > lastpos: |
|
1927 sp.append(_hole(lastpos, offset - lastpos)) |
|
1928 sp.append(_data(offset, numbytes, realpos)) |
|
1929 realpos += numbytes |
|
1930 lastpos = offset + numbytes |
|
1931 pos += 24 |
|
1932 |
|
1933 isextended = ord(buf[482]) |
|
1934 origsize = nti(buf[483:495]) |
|
1935 |
|
1936 # If the isextended flag is given, |
|
1937 # there are extra headers to process. |
|
1938 while isextended == 1: |
|
1939 buf = self.fileobj.read(BLOCKSIZE) |
|
1940 self.offset += BLOCKSIZE |
|
1941 pos = 0 |
|
1942 for i in xrange(21): |
|
1943 try: |
|
1944 offset = nti(buf[pos:pos + 12]) |
|
1945 numbytes = nti(buf[pos + 12:pos + 24]) |
|
1946 except ValueError: |
|
1947 break |
|
1948 if offset > lastpos: |
|
1949 sp.append(_hole(lastpos, offset - lastpos)) |
|
1950 sp.append(_data(offset, numbytes, realpos)) |
|
1951 realpos += numbytes |
|
1952 lastpos = offset + numbytes |
|
1953 pos += 24 |
|
1954 isextended = ord(buf[504]) |
|
1955 |
|
1956 if lastpos < origsize: |
|
1957 sp.append(_hole(lastpos, origsize - lastpos)) |
|
1958 |
|
1959 tarinfo.sparse = sp |
|
1960 |
|
1961 tarinfo.offset_data = self.offset |
|
1962 self.offset += self._block(tarinfo.size) |
|
1963 tarinfo.size = origsize |
|
1964 |
|
1965 return tarinfo |
|
1966 |
|
1967 #-------------------------------------------------------------------------- |
|
1968 # Little helper methods: |
|
1969 |
|
1970 def _block(self, count): |
|
1971 """Round up a byte count by BLOCKSIZE and return it, |
|
1972 e.g. _block(834) => 1024. |
|
1973 """ |
|
1974 blocks, remainder = divmod(count, BLOCKSIZE) |
|
1975 if remainder: |
|
1976 blocks += 1 |
|
1977 return blocks * BLOCKSIZE |
|
1978 |
|
1979 def _getmember(self, name, tarinfo=None): |
|
1980 """Find an archive member by name from bottom to top. |
|
1981 If tarinfo is given, it is used as the starting point. |
|
1982 """ |
|
1983 # Ensure that all members have been loaded. |
|
1984 members = self.getmembers() |
|
1985 |
|
1986 if tarinfo is None: |
|
1987 end = len(members) |
|
1988 else: |
|
1989 end = members.index(tarinfo) |
|
1990 |
|
1991 for i in xrange(end - 1, -1, -1): |
|
1992 if name == members[i].name: |
|
1993 return members[i] |
|
1994 |
|
1995 def _load(self): |
|
1996 """Read through the entire archive file and look for readable |
|
1997 members. |
|
1998 """ |
|
1999 while True: |
|
2000 tarinfo = self.next() |
|
2001 if tarinfo is None: |
|
2002 break |
|
2003 self._loaded = True |
|
2004 |
|
2005 def _check(self, mode=None): |
|
2006 """Check if TarFile is still open, and if the operation's mode |
|
2007 corresponds to TarFile's mode. |
|
2008 """ |
|
2009 if self.closed: |
|
2010 raise IOError("%s is closed" % self.__class__.__name__) |
|
2011 if mode is not None and self._mode not in mode: |
|
2012 raise IOError("bad operation for mode %r" % self._mode) |
|
2013 |
|
2014 def __iter__(self): |
|
2015 """Provide an iterator object. |
|
2016 """ |
|
2017 if self._loaded: |
|
2018 return iter(self.members) |
|
2019 else: |
|
2020 return TarIter(self) |
|
2021 |
|
2022 def _dbg(self, level, msg): |
|
2023 """Write debugging output to sys.stderr. |
|
2024 """ |
|
2025 if level <= self.debug: |
|
2026 print >> sys.stderr, msg |
|
2027 # class TarFile |
|
2028 |
|
2029 class TarIter: |
|
2030 """Iterator Class. |
|
2031 |
|
2032 for tarinfo in TarFile(...): |
|
2033 suite... |
|
2034 """ |
|
2035 |
|
2036 def __init__(self, tarfile): |
|
2037 """Construct a TarIter object. |
|
2038 """ |
|
2039 self.tarfile = tarfile |
|
2040 self.index = 0 |
|
2041 def __iter__(self): |
|
2042 """Return iterator object. |
|
2043 """ |
|
2044 return self |
|
2045 def next(self): |
|
2046 """Return the next item using TarFile's next() method. |
|
2047 When all members have been read, set TarFile as _loaded. |
|
2048 """ |
|
2049 # Fix for SF #1100429: Under rare circumstances it can |
|
2050 # happen that getmembers() is called during iteration, |
|
2051 # which will cause TarIter to stop prematurely. |
|
2052 if not self.tarfile._loaded: |
|
2053 tarinfo = self.tarfile.next() |
|
2054 if not tarinfo: |
|
2055 self.tarfile._loaded = True |
|
2056 raise StopIteration |
|
2057 else: |
|
2058 try: |
|
2059 tarinfo = self.tarfile.members[self.index] |
|
2060 except IndexError: |
|
2061 raise StopIteration |
|
2062 self.index += 1 |
|
2063 return tarinfo |
|
2064 |
|
2065 # Helper classes for sparse file support |
|
2066 class _section: |
|
2067 """Base class for _data and _hole. |
|
2068 """ |
|
2069 def __init__(self, offset, size): |
|
2070 self.offset = offset |
|
2071 self.size = size |
|
2072 def __contains__(self, offset): |
|
2073 return self.offset <= offset < self.offset + self.size |
|
2074 |
|
2075 class _data(_section): |
|
2076 """Represent a data section in a sparse file. |
|
2077 """ |
|
2078 def __init__(self, offset, size, realpos): |
|
2079 _section.__init__(self, offset, size) |
|
2080 self.realpos = realpos |
|
2081 |
|
2082 class _hole(_section): |
|
2083 """Represent a hole section in a sparse file. |
|
2084 """ |
|
2085 pass |
|
2086 |
|
2087 class _ringbuffer(list): |
|
2088 """Ringbuffer class which increases performance |
|
2089 over a regular list. |
|
2090 """ |
|
2091 def __init__(self): |
|
2092 self.idx = 0 |
|
2093 def find(self, offset): |
|
2094 idx = self.idx |
|
2095 while True: |
|
2096 item = self[idx] |
|
2097 if offset in item: |
|
2098 break |
|
2099 idx += 1 |
|
2100 if idx == len(self): |
|
2101 idx = 0 |
|
2102 if idx == self.idx: |
|
2103 # End of File |
|
2104 return None |
|
2105 self.idx = idx |
|
2106 return item |
|
2107 |
|
2108 #--------------------------------------------- |
|
2109 # zipfile compatible TarFile class |
|
2110 #--------------------------------------------- |
|
2111 TAR_PLAIN = 0 # zipfile.ZIP_STORED |
|
2112 TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED |
|
2113 class TarFileCompat: |
|
2114 """TarFile class compatible with standard module zipfile's |
|
2115 ZipFile class. |
|
2116 """ |
|
2117 def __init__(self, file, mode="r", compression=TAR_PLAIN): |
|
2118 if compression == TAR_PLAIN: |
|
2119 self.tarfile = TarFile.taropen(file, mode) |
|
2120 elif compression == TAR_GZIPPED: |
|
2121 self.tarfile = TarFile.gzopen(file, mode) |
|
2122 else: |
|
2123 raise ValueError("unknown compression constant") |
|
2124 if mode[0:1] == "r": |
|
2125 members = self.tarfile.getmembers() |
|
2126 for m in members: |
|
2127 m.filename = m.name |
|
2128 m.file_size = m.size |
|
2129 m.date_time = time.gmtime(m.mtime)[:6] |
|
2130 def namelist(self): |
|
2131 return map(lambda m: m.name, self.infolist()) |
|
2132 def infolist(self): |
|
2133 return filter(lambda m: m.type in REGULAR_TYPES, |
|
2134 self.tarfile.getmembers()) |
|
2135 def printdir(self): |
|
2136 self.tarfile.list() |
|
2137 def testzip(self): |
|
2138 return |
|
2139 def getinfo(self, name): |
|
2140 return self.tarfile.getmember(name) |
|
2141 def read(self, name): |
|
2142 return self.tarfile.extractfile(self.tarfile.getmember(name)).read() |
|
2143 def write(self, filename, arcname=None, compress_type=None): |
|
2144 self.tarfile.add(filename, arcname) |
|
2145 def writestr(self, zinfo, bytes): |
|
2146 try: |
|
2147 from cStringIO import StringIO |
|
2148 except ImportError: |
|
2149 from StringIO import StringIO |
|
2150 import calendar |
|
2151 zinfo.name = zinfo.filename |
|
2152 zinfo.size = zinfo.file_size |
|
2153 zinfo.mtime = calendar.timegm(zinfo.date_time) |
|
2154 self.tarfile.addfile(zinfo, StringIO(bytes)) |
|
2155 def close(self): |
|
2156 self.tarfile.close() |
|
2157 #class TarFileCompat |
|
2158 |
|
2159 #-------------------- |
|
2160 # exported functions |
|
2161 #-------------------- |
|
2162 def is_tarfile(name): |
|
2163 """Return True if name points to a tar archive that we |
|
2164 are able to handle, else return False. |
|
2165 """ |
|
2166 try: |
|
2167 t = open(name) |
|
2168 t.close() |
|
2169 return True |
|
2170 except TarError: |
|
2171 return False |
|
2172 |
|
2173 open = TarFile.open |