python-2.5.2/win32/Lib/tarfile.py
changeset 0 ae805ac0140d
equal deleted inserted replaced
-1:000000000000 0:ae805ac0140d
       
     1 #!/usr/bin/env python
       
     2 # -*- coding: iso-8859-1 -*-
       
     3 #-------------------------------------------------------------------
       
     4 # tarfile.py
       
     5 #-------------------------------------------------------------------
       
     6 # Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
       
     7 # All rights reserved.
       
     8 #
       
     9 # Permission  is  hereby granted,  free  of charge,  to  any person
       
    10 # obtaining a  copy of  this software  and associated documentation
       
    11 # files  (the  "Software"),  to   deal  in  the  Software   without
       
    12 # restriction,  including  without limitation  the  rights to  use,
       
    13 # copy, modify, merge, publish, distribute, sublicense, and/or sell
       
    14 # copies  of  the  Software,  and to  permit  persons  to  whom the
       
    15 # Software  is  furnished  to  do  so,  subject  to  the  following
       
    16 # conditions:
       
    17 #
       
    18 # The above copyright  notice and this  permission notice shall  be
       
    19 # included in all copies or substantial portions of the Software.
       
    20 #
       
    21 # THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
       
    22 # EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
       
    23 # OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
       
    24 # NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
       
    25 # HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
       
    26 # WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
       
    27 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
       
    28 # OTHER DEALINGS IN THE SOFTWARE.
       
    29 #
       
    30 """Read from and write to tar format archives.
       
    31 """
       
    32 
       
    33 __version__ = "$Revision: 60730 $"
       
    34 # $Source$
       
    35 
       
    36 version     = "0.8.0"
       
    37 __author__  = "Lars Gustäbel (lars@gustaebel.de)"
       
    38 __date__    = "$Date: 2008-02-11 19:36:07 +0100 (Mo, 11 Feb 2008) $"
       
    39 __cvsid__   = "$Id: tarfile.py 60730 2008-02-11 18:36:07Z lars.gustaebel $"
       
    40 __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
       
    41 
       
    42 #---------
       
    43 # Imports
       
    44 #---------
       
    45 import sys
       
    46 import os
       
    47 import shutil
       
    48 import stat
       
    49 import errno
       
    50 import time
       
    51 import struct
       
    52 import copy
       
    53 
       
    54 if sys.platform == 'mac':
       
    55     # This module needs work for MacOS9, especially in the area of pathname
       
    56     # handling. In many places it is assumed a simple substitution of / by the
       
    57     # local os.path.sep is good enough to convert pathnames, but this does not
       
    58     # work with the mac rooted:path:name versus :nonrooted:path:name syntax
       
    59     raise ImportError, "tarfile does not work for platform==mac"
       
    60 
       
    61 try:
       
    62     import grp, pwd
       
    63 except ImportError:
       
    64     grp = pwd = None
       
    65 
       
    66 # from tarfile import *
       
    67 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
       
    68 
       
    69 #---------------------------------------------------------
       
    70 # tar constants
       
    71 #---------------------------------------------------------
       
    72 NUL        = "\0"               # the null character
       
    73 BLOCKSIZE  = 512                # length of processing blocks
       
    74 RECORDSIZE = BLOCKSIZE * 20     # length of records
       
    75 MAGIC      = "ustar"            # magic tar string
       
    76 VERSION    = "00"               # version number
       
    77 
       
    78 LENGTH_NAME    = 100            # maximum length of a filename
       
    79 LENGTH_LINK    = 100            # maximum length of a linkname
       
    80 LENGTH_PREFIX  = 155            # maximum length of the prefix field
       
    81 MAXSIZE_MEMBER = 077777777777L  # maximum size of a file (11 octal digits)
       
    82 
       
    83 REGTYPE  = "0"                  # regular file
       
    84 AREGTYPE = "\0"                 # regular file
       
    85 LNKTYPE  = "1"                  # link (inside tarfile)
       
    86 SYMTYPE  = "2"                  # symbolic link
       
    87 CHRTYPE  = "3"                  # character special device
       
    88 BLKTYPE  = "4"                  # block special device
       
    89 DIRTYPE  = "5"                  # directory
       
    90 FIFOTYPE = "6"                  # fifo special device
       
    91 CONTTYPE = "7"                  # contiguous file
       
    92 
       
    93 GNUTYPE_LONGNAME = "L"          # GNU tar extension for longnames
       
    94 GNUTYPE_LONGLINK = "K"          # GNU tar extension for longlink
       
    95 GNUTYPE_SPARSE   = "S"          # GNU tar extension for sparse file
       
    96 
       
    97 #---------------------------------------------------------
       
    98 # tarfile constants
       
    99 #---------------------------------------------------------
       
   100 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,  # file types that tarfile
       
   101                    SYMTYPE, DIRTYPE, FIFOTYPE,  # can cope with.
       
   102                    CONTTYPE, CHRTYPE, BLKTYPE,
       
   103                    GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
       
   104                    GNUTYPE_SPARSE)
       
   105 
       
   106 REGULAR_TYPES = (REGTYPE, AREGTYPE,             # file types that somehow
       
   107                  CONTTYPE, GNUTYPE_SPARSE)      # represent regular files
       
   108 
       
   109 #---------------------------------------------------------
       
   110 # Bits used in the mode field, values in octal.
       
   111 #---------------------------------------------------------
       
   112 S_IFLNK = 0120000        # symbolic link
       
   113 S_IFREG = 0100000        # regular file
       
   114 S_IFBLK = 0060000        # block device
       
   115 S_IFDIR = 0040000        # directory
       
   116 S_IFCHR = 0020000        # character device
       
   117 S_IFIFO = 0010000        # fifo
       
   118 
       
   119 TSUID   = 04000          # set UID on execution
       
   120 TSGID   = 02000          # set GID on execution
       
   121 TSVTX   = 01000          # reserved
       
   122 
       
   123 TUREAD  = 0400           # read by owner
       
   124 TUWRITE = 0200           # write by owner
       
   125 TUEXEC  = 0100           # execute/search by owner
       
   126 TGREAD  = 0040           # read by group
       
   127 TGWRITE = 0020           # write by group
       
   128 TGEXEC  = 0010           # execute/search by group
       
   129 TOREAD  = 0004           # read by other
       
   130 TOWRITE = 0002           # write by other
       
   131 TOEXEC  = 0001           # execute/search by other
       
   132 
       
   133 #---------------------------------------------------------
       
   134 # Some useful functions
       
   135 #---------------------------------------------------------
       
   136 
       
   137 def stn(s, length):
       
   138     """Convert a python string to a null-terminated string buffer.
       
   139     """
       
   140     return s[:length] + (length - len(s)) * NUL
       
   141 
       
   142 def nts(s):
       
   143     """Convert a null-terminated string field to a python string.
       
   144     """
       
   145     # Use the string up to the first null char.
       
   146     p = s.find("\0")
       
   147     if p == -1:
       
   148         return s
       
   149     return s[:p]
       
   150 
       
   151 def nti(s):
       
   152     """Convert a number field to a python number.
       
   153     """
       
   154     # There are two possible encodings for a number field, see
       
   155     # itn() below.
       
   156     if s[0] != chr(0200):
       
   157         n = int(nts(s) or "0", 8)
       
   158     else:
       
   159         n = 0L
       
   160         for i in xrange(len(s) - 1):
       
   161             n <<= 8
       
   162             n += ord(s[i + 1])
       
   163     return n
       
   164 
       
   165 def itn(n, digits=8, posix=False):
       
   166     """Convert a python number to a number field.
       
   167     """
       
   168     # POSIX 1003.1-1988 requires numbers to be encoded as a string of
       
   169     # octal digits followed by a null-byte, this allows values up to
       
   170     # (8**(digits-1))-1. GNU tar allows storing numbers greater than
       
   171     # that if necessary. A leading 0200 byte indicates this particular
       
   172     # encoding, the following digits-1 bytes are a big-endian
       
   173     # representation. This allows values up to (256**(digits-1))-1.
       
   174     if 0 <= n < 8 ** (digits - 1):
       
   175         s = "%0*o" % (digits - 1, n) + NUL
       
   176     else:
       
   177         if posix:
       
   178             raise ValueError("overflow in number field")
       
   179 
       
   180         if n < 0:
       
   181             # XXX We mimic GNU tar's behaviour with negative numbers,
       
   182             # this could raise OverflowError.
       
   183             n = struct.unpack("L", struct.pack("l", n))[0]
       
   184 
       
   185         s = ""
       
   186         for i in xrange(digits - 1):
       
   187             s = chr(n & 0377) + s
       
   188             n >>= 8
       
   189         s = chr(0200) + s
       
   190     return s
       
   191 
       
   192 def calc_chksums(buf):
       
   193     """Calculate the checksum for a member's header by summing up all
       
   194        characters except for the chksum field which is treated as if
       
   195        it was filled with spaces. According to the GNU tar sources,
       
   196        some tars (Sun and NeXT) calculate chksum with signed char,
       
   197        which will be different if there are chars in the buffer with
       
   198        the high bit set. So we calculate two checksums, unsigned and
       
   199        signed.
       
   200     """
       
   201     unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
       
   202     signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
       
   203     return unsigned_chksum, signed_chksum
       
   204 
       
   205 def copyfileobj(src, dst, length=None):
       
   206     """Copy length bytes from fileobj src to fileobj dst.
       
   207        If length is None, copy the entire content.
       
   208     """
       
   209     if length == 0:
       
   210         return
       
   211     if length is None:
       
   212         shutil.copyfileobj(src, dst)
       
   213         return
       
   214 
       
   215     BUFSIZE = 16 * 1024
       
   216     blocks, remainder = divmod(length, BUFSIZE)
       
   217     for b in xrange(blocks):
       
   218         buf = src.read(BUFSIZE)
       
   219         if len(buf) < BUFSIZE:
       
   220             raise IOError("end of file reached")
       
   221         dst.write(buf)
       
   222 
       
   223     if remainder != 0:
       
   224         buf = src.read(remainder)
       
   225         if len(buf) < remainder:
       
   226             raise IOError("end of file reached")
       
   227         dst.write(buf)
       
   228     return
       
   229 
       
   230 filemode_table = (
       
   231     ((S_IFLNK,      "l"),
       
   232      (S_IFREG,      "-"),
       
   233      (S_IFBLK,      "b"),
       
   234      (S_IFDIR,      "d"),
       
   235      (S_IFCHR,      "c"),
       
   236      (S_IFIFO,      "p")),
       
   237 
       
   238     ((TUREAD,       "r"),),
       
   239     ((TUWRITE,      "w"),),
       
   240     ((TUEXEC|TSUID, "s"),
       
   241      (TSUID,        "S"),
       
   242      (TUEXEC,       "x")),
       
   243 
       
   244     ((TGREAD,       "r"),),
       
   245     ((TGWRITE,      "w"),),
       
   246     ((TGEXEC|TSGID, "s"),
       
   247      (TSGID,        "S"),
       
   248      (TGEXEC,       "x")),
       
   249 
       
   250     ((TOREAD,       "r"),),
       
   251     ((TOWRITE,      "w"),),
       
   252     ((TOEXEC|TSVTX, "t"),
       
   253      (TSVTX,        "T"),
       
   254      (TOEXEC,       "x"))
       
   255 )
       
   256 
       
   257 def filemode(mode):
       
   258     """Convert a file's mode to a string of the form
       
   259        -rwxrwxrwx.
       
   260        Used by TarFile.list()
       
   261     """
       
   262     perm = []
       
   263     for table in filemode_table:
       
   264         for bit, char in table:
       
   265             if mode & bit == bit:
       
   266                 perm.append(char)
       
   267                 break
       
   268         else:
       
   269             perm.append("-")
       
   270     return "".join(perm)
       
   271 
       
   272 if os.sep != "/":
       
   273     normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
       
   274 else:
       
   275     normpath = os.path.normpath
       
   276 
       
   277 class TarError(Exception):
       
   278     """Base exception."""
       
   279     pass
       
   280 class ExtractError(TarError):
       
   281     """General exception for extract errors."""
       
   282     pass
       
   283 class ReadError(TarError):
       
   284     """Exception for unreadble tar archives."""
       
   285     pass
       
   286 class CompressionError(TarError):
       
   287     """Exception for unavailable compression methods."""
       
   288     pass
       
   289 class StreamError(TarError):
       
   290     """Exception for unsupported operations on stream-like TarFiles."""
       
   291     pass
       
   292 
       
   293 #---------------------------
       
   294 # internal stream interface
       
   295 #---------------------------
       
   296 class _LowLevelFile:
       
   297     """Low-level file object. Supports reading and writing.
       
   298        It is used instead of a regular file object for streaming
       
   299        access.
       
   300     """
       
   301 
       
   302     def __init__(self, name, mode):
       
   303         mode = {
       
   304             "r": os.O_RDONLY,
       
   305             "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
       
   306         }[mode]
       
   307         if hasattr(os, "O_BINARY"):
       
   308             mode |= os.O_BINARY
       
   309         self.fd = os.open(name, mode)
       
   310 
       
   311     def close(self):
       
   312         os.close(self.fd)
       
   313 
       
   314     def read(self, size):
       
   315         return os.read(self.fd, size)
       
   316 
       
   317     def write(self, s):
       
   318         os.write(self.fd, s)
       
   319 
       
   320 class _Stream:
       
   321     """Class that serves as an adapter between TarFile and
       
   322        a stream-like object.  The stream-like object only
       
   323        needs to have a read() or write() method and is accessed
       
   324        blockwise.  Use of gzip or bzip2 compression is possible.
       
   325        A stream-like object could be for example: sys.stdin,
       
   326        sys.stdout, a socket, a tape device etc.
       
   327 
       
   328        _Stream is intended to be used only internally.
       
   329     """
       
   330 
       
   331     def __init__(self, name, mode, comptype, fileobj, bufsize):
       
   332         """Construct a _Stream object.
       
   333         """
       
   334         self._extfileobj = True
       
   335         if fileobj is None:
       
   336             fileobj = _LowLevelFile(name, mode)
       
   337             self._extfileobj = False
       
   338 
       
   339         if comptype == '*':
       
   340             # Enable transparent compression detection for the
       
   341             # stream interface
       
   342             fileobj = _StreamProxy(fileobj)
       
   343             comptype = fileobj.getcomptype()
       
   344 
       
   345         self.name     = name or ""
       
   346         self.mode     = mode
       
   347         self.comptype = comptype
       
   348         self.fileobj  = fileobj
       
   349         self.bufsize  = bufsize
       
   350         self.buf      = ""
       
   351         self.pos      = 0L
       
   352         self.closed   = False
       
   353 
       
   354         if comptype == "gz":
       
   355             try:
       
   356                 import zlib
       
   357             except ImportError:
       
   358                 raise CompressionError("zlib module is not available")
       
   359             self.zlib = zlib
       
   360             self.crc = zlib.crc32("")
       
   361             if mode == "r":
       
   362                 self._init_read_gz()
       
   363             else:
       
   364                 self._init_write_gz()
       
   365 
       
   366         if comptype == "bz2":
       
   367             try:
       
   368                 import bz2
       
   369             except ImportError:
       
   370                 raise CompressionError("bz2 module is not available")
       
   371             if mode == "r":
       
   372                 self.dbuf = ""
       
   373                 self.cmp = bz2.BZ2Decompressor()
       
   374             else:
       
   375                 self.cmp = bz2.BZ2Compressor()
       
   376 
       
   377     def __del__(self):
       
   378         if hasattr(self, "closed") and not self.closed:
       
   379             self.close()
       
   380 
       
   381     def _init_write_gz(self):
       
   382         """Initialize for writing with gzip compression.
       
   383         """
       
   384         self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
       
   385                                             -self.zlib.MAX_WBITS,
       
   386                                             self.zlib.DEF_MEM_LEVEL,
       
   387                                             0)
       
   388         timestamp = struct.pack("<L", long(time.time()))
       
   389         self.__write("\037\213\010\010%s\002\377" % timestamp)
       
   390         if self.name.endswith(".gz"):
       
   391             self.name = self.name[:-3]
       
   392         self.__write(self.name + NUL)
       
   393 
       
   394     def write(self, s):
       
   395         """Write string s to the stream.
       
   396         """
       
   397         if self.comptype == "gz":
       
   398             self.crc = self.zlib.crc32(s, self.crc)
       
   399         self.pos += len(s)
       
   400         if self.comptype != "tar":
       
   401             s = self.cmp.compress(s)
       
   402         self.__write(s)
       
   403 
       
   404     def __write(self, s):
       
   405         """Write string s to the stream if a whole new block
       
   406            is ready to be written.
       
   407         """
       
   408         self.buf += s
       
   409         while len(self.buf) > self.bufsize:
       
   410             self.fileobj.write(self.buf[:self.bufsize])
       
   411             self.buf = self.buf[self.bufsize:]
       
   412 
       
   413     def close(self):
       
   414         """Close the _Stream object. No operation should be
       
   415            done on it afterwards.
       
   416         """
       
   417         if self.closed:
       
   418             return
       
   419 
       
   420         if self.mode == "w" and self.comptype != "tar":
       
   421             self.buf += self.cmp.flush()
       
   422 
       
   423         if self.mode == "w" and self.buf:
       
   424             self.fileobj.write(self.buf)
       
   425             self.buf = ""
       
   426             if self.comptype == "gz":
       
   427                 # The native zlib crc is an unsigned 32-bit integer, but
       
   428                 # the Python wrapper implicitly casts that to a signed C
       
   429                 # long.  So, on a 32-bit box self.crc may "look negative",
       
   430                 # while the same crc on a 64-bit box may "look positive".
       
   431                 # To avoid irksome warnings from the `struct` module, force
       
   432                 # it to look positive on all boxes.
       
   433                 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
       
   434                 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
       
   435 
       
   436         if not self._extfileobj:
       
   437             self.fileobj.close()
       
   438 
       
   439         self.closed = True
       
   440 
       
   441     def _init_read_gz(self):
       
   442         """Initialize for reading a gzip compressed fileobj.
       
   443         """
       
   444         self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
       
   445         self.dbuf = ""
       
   446 
       
   447         # taken from gzip.GzipFile with some alterations
       
   448         if self.__read(2) != "\037\213":
       
   449             raise ReadError("not a gzip file")
       
   450         if self.__read(1) != "\010":
       
   451             raise CompressionError("unsupported compression method")
       
   452 
       
   453         flag = ord(self.__read(1))
       
   454         self.__read(6)
       
   455 
       
   456         if flag & 4:
       
   457             xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
       
   458             self.read(xlen)
       
   459         if flag & 8:
       
   460             while True:
       
   461                 s = self.__read(1)
       
   462                 if not s or s == NUL:
       
   463                     break
       
   464         if flag & 16:
       
   465             while True:
       
   466                 s = self.__read(1)
       
   467                 if not s or s == NUL:
       
   468                     break
       
   469         if flag & 2:
       
   470             self.__read(2)
       
   471 
       
   472     def tell(self):
       
   473         """Return the stream's file pointer position.
       
   474         """
       
   475         return self.pos
       
   476 
       
   477     def seek(self, pos=0):
       
   478         """Set the stream's file pointer to pos. Negative seeking
       
   479            is forbidden.
       
   480         """
       
   481         if pos - self.pos >= 0:
       
   482             blocks, remainder = divmod(pos - self.pos, self.bufsize)
       
   483             for i in xrange(blocks):
       
   484                 self.read(self.bufsize)
       
   485             self.read(remainder)
       
   486         else:
       
   487             raise StreamError("seeking backwards is not allowed")
       
   488         return self.pos
       
   489 
       
   490     def read(self, size=None):
       
   491         """Return the next size number of bytes from the stream.
       
   492            If size is not defined, return all bytes of the stream
       
   493            up to EOF.
       
   494         """
       
   495         if size is None:
       
   496             t = []
       
   497             while True:
       
   498                 buf = self._read(self.bufsize)
       
   499                 if not buf:
       
   500                     break
       
   501                 t.append(buf)
       
   502             buf = "".join(t)
       
   503         else:
       
   504             buf = self._read(size)
       
   505         self.pos += len(buf)
       
   506         return buf
       
   507 
       
   508     def _read(self, size):
       
   509         """Return size bytes from the stream.
       
   510         """
       
   511         if self.comptype == "tar":
       
   512             return self.__read(size)
       
   513 
       
   514         c = len(self.dbuf)
       
   515         t = [self.dbuf]
       
   516         while c < size:
       
   517             buf = self.__read(self.bufsize)
       
   518             if not buf:
       
   519                 break
       
   520             buf = self.cmp.decompress(buf)
       
   521             t.append(buf)
       
   522             c += len(buf)
       
   523         t = "".join(t)
       
   524         self.dbuf = t[size:]
       
   525         return t[:size]
       
   526 
       
   527     def __read(self, size):
       
   528         """Return size bytes from stream. If internal buffer is empty,
       
   529            read another block from the stream.
       
   530         """
       
   531         c = len(self.buf)
       
   532         t = [self.buf]
       
   533         while c < size:
       
   534             buf = self.fileobj.read(self.bufsize)
       
   535             if not buf:
       
   536                 break
       
   537             t.append(buf)
       
   538             c += len(buf)
       
   539         t = "".join(t)
       
   540         self.buf = t[size:]
       
   541         return t[:size]
       
   542 # class _Stream
       
   543 
       
   544 class _StreamProxy(object):
       
   545     """Small proxy class that enables transparent compression
       
   546        detection for the Stream interface (mode 'r|*').
       
   547     """
       
   548 
       
   549     def __init__(self, fileobj):
       
   550         self.fileobj = fileobj
       
   551         self.buf = self.fileobj.read(BLOCKSIZE)
       
   552 
       
   553     def read(self, size):
       
   554         self.read = self.fileobj.read
       
   555         return self.buf
       
   556 
       
   557     def getcomptype(self):
       
   558         if self.buf.startswith("\037\213\010"):
       
   559             return "gz"
       
   560         if self.buf.startswith("BZh91"):
       
   561             return "bz2"
       
   562         return "tar"
       
   563 
       
   564     def close(self):
       
   565         self.fileobj.close()
       
   566 # class StreamProxy
       
   567 
       
   568 class _BZ2Proxy(object):
       
   569     """Small proxy class that enables external file object
       
   570        support for "r:bz2" and "w:bz2" modes. This is actually
       
   571        a workaround for a limitation in bz2 module's BZ2File
       
   572        class which (unlike gzip.GzipFile) has no support for
       
   573        a file object argument.
       
   574     """
       
   575 
       
   576     blocksize = 16 * 1024
       
   577 
       
   578     def __init__(self, fileobj, mode):
       
   579         self.fileobj = fileobj
       
   580         self.mode = mode
       
   581         self.init()
       
   582 
       
   583     def init(self):
       
   584         import bz2
       
   585         self.pos = 0
       
   586         if self.mode == "r":
       
   587             self.bz2obj = bz2.BZ2Decompressor()
       
   588             self.fileobj.seek(0)
       
   589             self.buf = ""
       
   590         else:
       
   591             self.bz2obj = bz2.BZ2Compressor()
       
   592 
       
   593     def read(self, size):
       
   594         b = [self.buf]
       
   595         x = len(self.buf)
       
   596         while x < size:
       
   597             try:
       
   598                 raw = self.fileobj.read(self.blocksize)
       
   599                 data = self.bz2obj.decompress(raw)
       
   600                 b.append(data)
       
   601             except EOFError:
       
   602                 break
       
   603             x += len(data)
       
   604         self.buf = "".join(b)
       
   605 
       
   606         buf = self.buf[:size]
       
   607         self.buf = self.buf[size:]
       
   608         self.pos += len(buf)
       
   609         return buf
       
   610 
       
   611     def seek(self, pos):
       
   612         if pos < self.pos:
       
   613             self.init()
       
   614         self.read(pos - self.pos)
       
   615 
       
   616     def tell(self):
       
   617         return self.pos
       
   618 
       
   619     def write(self, data):
       
   620         self.pos += len(data)
       
   621         raw = self.bz2obj.compress(data)
       
   622         self.fileobj.write(raw)
       
   623 
       
   624     def close(self):
       
   625         if self.mode == "w":
       
   626             raw = self.bz2obj.flush()
       
   627             self.fileobj.write(raw)
       
   628         self.fileobj.close()
       
   629 # class _BZ2Proxy
       
   630 
       
   631 #------------------------
       
   632 # Extraction file object
       
   633 #------------------------
       
   634 class _FileInFile(object):
       
   635     """A thin wrapper around an existing file object that
       
   636        provides a part of its data as an individual file
       
   637        object.
       
   638     """
       
   639 
       
   640     def __init__(self, fileobj, offset, size, sparse=None):
       
   641         self.fileobj = fileobj
       
   642         self.offset = offset
       
   643         self.size = size
       
   644         self.sparse = sparse
       
   645         self.position = 0
       
   646 
       
   647     def tell(self):
       
   648         """Return the current file position.
       
   649         """
       
   650         return self.position
       
   651 
       
   652     def seek(self, position):
       
   653         """Seek to a position in the file.
       
   654         """
       
   655         self.position = position
       
   656 
       
   657     def read(self, size=None):
       
   658         """Read data from the file.
       
   659         """
       
   660         if size is None:
       
   661             size = self.size - self.position
       
   662         else:
       
   663             size = min(size, self.size - self.position)
       
   664 
       
   665         if self.sparse is None:
       
   666             return self.readnormal(size)
       
   667         else:
       
   668             return self.readsparse(size)
       
   669 
       
   670     def readnormal(self, size):
       
   671         """Read operation for regular files.
       
   672         """
       
   673         self.fileobj.seek(self.offset + self.position)
       
   674         self.position += size
       
   675         return self.fileobj.read(size)
       
   676 
       
   677     def readsparse(self, size):
       
   678         """Read operation for sparse files.
       
   679         """
       
   680         data = []
       
   681         while size > 0:
       
   682             buf = self.readsparsesection(size)
       
   683             if not buf:
       
   684                 break
       
   685             size -= len(buf)
       
   686             data.append(buf)
       
   687         return "".join(data)
       
   688 
       
   689     def readsparsesection(self, size):
       
   690         """Read a single section of a sparse file.
       
   691         """
       
   692         section = self.sparse.find(self.position)
       
   693 
       
   694         if section is None:
       
   695             return ""
       
   696 
       
   697         size = min(size, section.offset + section.size - self.position)
       
   698 
       
   699         if isinstance(section, _data):
       
   700             realpos = section.realpos + self.position - section.offset
       
   701             self.fileobj.seek(self.offset + realpos)
       
   702             self.position += size
       
   703             return self.fileobj.read(size)
       
   704         else:
       
   705             self.position += size
       
   706             return NUL * size
       
   707 #class _FileInFile
       
   708 
       
   709 
       
   710 class ExFileObject(object):
       
   711     """File-like object for reading an archive member.
       
   712        Is returned by TarFile.extractfile().
       
   713     """
       
   714     blocksize = 1024
       
   715 
       
   716     def __init__(self, tarfile, tarinfo):
       
   717         self.fileobj = _FileInFile(tarfile.fileobj,
       
   718                                    tarinfo.offset_data,
       
   719                                    tarinfo.size,
       
   720                                    getattr(tarinfo, "sparse", None))
       
   721         self.name = tarinfo.name
       
   722         self.mode = "r"
       
   723         self.closed = False
       
   724         self.size = tarinfo.size
       
   725 
       
   726         self.position = 0
       
   727         self.buffer = ""
       
   728 
       
   729     def read(self, size=None):
       
   730         """Read at most size bytes from the file. If size is not
       
   731            present or None, read all data until EOF is reached.
       
   732         """
       
   733         if self.closed:
       
   734             raise ValueError("I/O operation on closed file")
       
   735 
       
   736         buf = ""
       
   737         if self.buffer:
       
   738             if size is None:
       
   739                 buf = self.buffer
       
   740                 self.buffer = ""
       
   741             else:
       
   742                 buf = self.buffer[:size]
       
   743                 self.buffer = self.buffer[size:]
       
   744 
       
   745         if size is None:
       
   746             buf += self.fileobj.read()
       
   747         else:
       
   748             buf += self.fileobj.read(size - len(buf))
       
   749 
       
   750         self.position += len(buf)
       
   751         return buf
       
   752 
       
   753     def readline(self, size=-1):
       
   754         """Read one entire line from the file. If size is present
       
   755            and non-negative, return a string with at most that
       
   756            size, which may be an incomplete line.
       
   757         """
       
   758         if self.closed:
       
   759             raise ValueError("I/O operation on closed file")
       
   760 
       
   761         if "\n" in self.buffer:
       
   762             pos = self.buffer.find("\n") + 1
       
   763         else:
       
   764             buffers = [self.buffer]
       
   765             while True:
       
   766                 buf = self.fileobj.read(self.blocksize)
       
   767                 buffers.append(buf)
       
   768                 if not buf or "\n" in buf:
       
   769                     self.buffer = "".join(buffers)
       
   770                     pos = self.buffer.find("\n") + 1
       
   771                     if pos == 0:
       
   772                         # no newline found.
       
   773                         pos = len(self.buffer)
       
   774                     break
       
   775 
       
   776         if size != -1:
       
   777             pos = min(size, pos)
       
   778 
       
   779         buf = self.buffer[:pos]
       
   780         self.buffer = self.buffer[pos:]
       
   781         self.position += len(buf)
       
   782         return buf
       
   783 
       
   784     def readlines(self):
       
   785         """Return a list with all remaining lines.
       
   786         """
       
   787         result = []
       
   788         while True:
       
   789             line = self.readline()
       
   790             if not line: break
       
   791             result.append(line)
       
   792         return result
       
   793 
       
   794     def tell(self):
       
   795         """Return the current file position.
       
   796         """
       
   797         if self.closed:
       
   798             raise ValueError("I/O operation on closed file")
       
   799 
       
   800         return self.position
       
   801 
       
   802     def seek(self, pos, whence=os.SEEK_SET):
       
   803         """Seek to a position in the file.
       
   804         """
       
   805         if self.closed:
       
   806             raise ValueError("I/O operation on closed file")
       
   807 
       
   808         if whence == os.SEEK_SET:
       
   809             self.position = min(max(pos, 0), self.size)
       
   810         elif whence == os.SEEK_CUR:
       
   811             if pos < 0:
       
   812                 self.position = max(self.position + pos, 0)
       
   813             else:
       
   814                 self.position = min(self.position + pos, self.size)
       
   815         elif whence == os.SEEK_END:
       
   816             self.position = max(min(self.size + pos, self.size), 0)
       
   817         else:
       
   818             raise ValueError("Invalid argument")
       
   819 
       
   820         self.buffer = ""
       
   821         self.fileobj.seek(self.position)
       
   822 
       
   823     def close(self):
       
   824         """Close the file object.
       
   825         """
       
   826         self.closed = True
       
   827 
       
   828     def __iter__(self):
       
   829         """Get an iterator over the file's lines.
       
   830         """
       
   831         while True:
       
   832             line = self.readline()
       
   833             if not line:
       
   834                 break
       
   835             yield line
       
   836 #class ExFileObject
       
   837 
       
   838 #------------------
       
   839 # Exported Classes
       
   840 #------------------
       
   841 class TarInfo(object):
       
   842     """Informational class which holds the details about an
       
   843        archive member given by a tar header block.
       
   844        TarInfo objects are returned by TarFile.getmember(),
       
   845        TarFile.getmembers() and TarFile.gettarinfo() and are
       
   846        usually created internally.
       
   847     """
       
   848 
       
   849     def __init__(self, name=""):
       
   850         """Construct a TarInfo object. name is the optional name
       
   851            of the member.
       
   852         """
       
   853         self.name = name        # member name (dirnames must end with '/')
       
   854         self.mode = 0666        # file permissions
       
   855         self.uid = 0            # user id
       
   856         self.gid = 0            # group id
       
   857         self.size = 0           # file size
       
   858         self.mtime = 0          # modification time
       
   859         self.chksum = 0         # header checksum
       
   860         self.type = REGTYPE     # member type
       
   861         self.linkname = ""      # link name
       
   862         self.uname = "user"     # user name
       
   863         self.gname = "group"    # group name
       
   864         self.devmajor = 0       # device major number
       
   865         self.devminor = 0       # device minor number
       
   866 
       
   867         self.offset = 0         # the tar header starts here
       
   868         self.offset_data = 0    # the file's data starts here
       
   869 
       
   870     def __repr__(self):
       
   871         return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
       
   872 
       
   873     @classmethod
       
   874     def frombuf(cls, buf):
       
   875         """Construct a TarInfo object from a 512 byte string buffer.
       
   876         """
       
   877         if len(buf) != BLOCKSIZE:
       
   878             raise ValueError("truncated header")
       
   879         if buf.count(NUL) == BLOCKSIZE:
       
   880             raise ValueError("empty header")
       
   881 
       
   882         tarinfo = cls()
       
   883         tarinfo.buf = buf
       
   884         tarinfo.name = nts(buf[0:100])
       
   885         tarinfo.mode = nti(buf[100:108])
       
   886         tarinfo.uid = nti(buf[108:116])
       
   887         tarinfo.gid = nti(buf[116:124])
       
   888         tarinfo.size = nti(buf[124:136])
       
   889         tarinfo.mtime = nti(buf[136:148])
       
   890         tarinfo.chksum = nti(buf[148:156])
       
   891         tarinfo.type = buf[156:157]
       
   892         tarinfo.linkname = nts(buf[157:257])
       
   893         tarinfo.uname = nts(buf[265:297])
       
   894         tarinfo.gname = nts(buf[297:329])
       
   895         tarinfo.devmajor = nti(buf[329:337])
       
   896         tarinfo.devminor = nti(buf[337:345])
       
   897         prefix = nts(buf[345:500])
       
   898 
       
   899         if prefix and not tarinfo.issparse():
       
   900             tarinfo.name = prefix + "/" + tarinfo.name
       
   901 
       
   902         if tarinfo.chksum not in calc_chksums(buf):
       
   903             raise ValueError("invalid header")
       
   904         return tarinfo
       
   905 
       
   906     def tobuf(self, posix=False):
       
   907         """Return a tar header as a string of 512 byte blocks.
       
   908         """
       
   909         buf = ""
       
   910         type = self.type
       
   911         prefix = ""
       
   912 
       
   913         if self.name.endswith("/"):
       
   914             type = DIRTYPE
       
   915 
       
   916         if type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
       
   917             # Prevent "././@LongLink" from being normalized.
       
   918             name = self.name
       
   919         else:
       
   920             name = normpath(self.name)
       
   921 
       
   922         if type == DIRTYPE:
       
   923             # directories should end with '/'
       
   924             name += "/"
       
   925 
       
   926         linkname = self.linkname
       
   927         if linkname:
       
   928             # if linkname is empty we end up with a '.'
       
   929             linkname = normpath(linkname)
       
   930 
       
   931         if posix:
       
   932             if self.size > MAXSIZE_MEMBER:
       
   933                 raise ValueError("file is too large (>= 8 GB)")
       
   934 
       
   935             if len(self.linkname) > LENGTH_LINK:
       
   936                 raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
       
   937 
       
   938             if len(name) > LENGTH_NAME:
       
   939                 prefix = name[:LENGTH_PREFIX + 1]
       
   940                 while prefix and prefix[-1] != "/":
       
   941                     prefix = prefix[:-1]
       
   942 
       
   943                 name = name[len(prefix):]
       
   944                 prefix = prefix[:-1]
       
   945 
       
   946                 if not prefix or len(name) > LENGTH_NAME:
       
   947                     raise ValueError("name is too long")
       
   948 
       
   949         else:
       
   950             if len(self.linkname) > LENGTH_LINK:
       
   951                 buf += self._create_gnulong(self.linkname, GNUTYPE_LONGLINK)
       
   952 
       
   953             if len(name) > LENGTH_NAME:
       
   954                 buf += self._create_gnulong(name, GNUTYPE_LONGNAME)
       
   955 
       
   956         parts = [
       
   957             stn(name, 100),
       
   958             itn(self.mode & 07777, 8, posix),
       
   959             itn(self.uid, 8, posix),
       
   960             itn(self.gid, 8, posix),
       
   961             itn(self.size, 12, posix),
       
   962             itn(self.mtime, 12, posix),
       
   963             "        ", # checksum field
       
   964             type,
       
   965             stn(self.linkname, 100),
       
   966             stn(MAGIC, 6),
       
   967             stn(VERSION, 2),
       
   968             stn(self.uname, 32),
       
   969             stn(self.gname, 32),
       
   970             itn(self.devmajor, 8, posix),
       
   971             itn(self.devminor, 8, posix),
       
   972             stn(prefix, 155)
       
   973         ]
       
   974 
       
   975         buf += "".join(parts).ljust(BLOCKSIZE, NUL)
       
   976         chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
       
   977         buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
       
   978         self.buf = buf
       
   979         return buf
       
   980 
       
   981     def _create_gnulong(self, name, type):
       
   982         """Create a GNU longname/longlink header from name.
       
   983            It consists of an extended tar header, with the length
       
   984            of the longname as size, followed by data blocks,
       
   985            which contain the longname as a null terminated string.
       
   986         """
       
   987         name += NUL
       
   988 
       
   989         tarinfo = self.__class__()
       
   990         tarinfo.name = "././@LongLink"
       
   991         tarinfo.type = type
       
   992         tarinfo.mode = 0
       
   993         tarinfo.size = len(name)
       
   994 
       
   995         # create extended header
       
   996         buf = tarinfo.tobuf()
       
   997         # create name blocks
       
   998         buf += name
       
   999         blocks, remainder = divmod(len(name), BLOCKSIZE)
       
  1000         if remainder > 0:
       
  1001             buf += (BLOCKSIZE - remainder) * NUL
       
  1002         return buf
       
  1003 
       
  1004     def isreg(self):
       
  1005         return self.type in REGULAR_TYPES
       
  1006     def isfile(self):
       
  1007         return self.isreg()
       
  1008     def isdir(self):
       
  1009         return self.type == DIRTYPE
       
  1010     def issym(self):
       
  1011         return self.type == SYMTYPE
       
  1012     def islnk(self):
       
  1013         return self.type == LNKTYPE
       
  1014     def ischr(self):
       
  1015         return self.type == CHRTYPE
       
  1016     def isblk(self):
       
  1017         return self.type == BLKTYPE
       
  1018     def isfifo(self):
       
  1019         return self.type == FIFOTYPE
       
  1020     def issparse(self):
       
  1021         return self.type == GNUTYPE_SPARSE
       
  1022     def isdev(self):
       
  1023         return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
       
  1024 # class TarInfo
       
  1025 
       
  1026 class TarFile(object):
       
  1027     """The TarFile Class provides an interface to tar archives.
       
  1028     """
       
  1029 
       
  1030     debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
       
  1031 
       
  1032     dereference = False         # If true, add content of linked file to the
       
  1033                                 # tar file, else the link.
       
  1034 
       
  1035     ignore_zeros = False        # If true, skips empty or invalid blocks and
       
  1036                                 # continues processing.
       
  1037 
       
  1038     errorlevel = 0              # If 0, fatal errors only appear in debug
       
  1039                                 # messages (if debug >= 0). If > 0, errors
       
  1040                                 # are passed to the caller as exceptions.
       
  1041 
       
  1042     posix = False               # If True, generates POSIX.1-1990-compliant
       
  1043                                 # archives (no GNU extensions!)
       
  1044 
       
  1045     fileobject = ExFileObject
       
  1046 
       
  1047     def __init__(self, name=None, mode="r", fileobj=None):
       
  1048         """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
       
  1049            read from an existing archive, 'a' to append data to an existing
       
  1050            file or 'w' to create a new file overwriting an existing one. `mode'
       
  1051            defaults to 'r'.
       
  1052            If `fileobj' is given, it is used for reading or writing data. If it
       
  1053            can be determined, `mode' is overridden by `fileobj's mode.
       
  1054            `fileobj' is not closed, when TarFile is closed.
       
  1055         """
       
  1056         if len(mode) > 1 or mode not in "raw":
       
  1057             raise ValueError("mode must be 'r', 'a' or 'w'")
       
  1058         self._mode = mode
       
  1059         self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
       
  1060 
       
  1061         if not fileobj:
       
  1062             fileobj = file(name, self.mode)
       
  1063             self._extfileobj = False
       
  1064         else:
       
  1065             if name is None and hasattr(fileobj, "name"):
       
  1066                 name = fileobj.name
       
  1067             if hasattr(fileobj, "mode"):
       
  1068                 self.mode = fileobj.mode
       
  1069             self._extfileobj = True
       
  1070         self.name = os.path.abspath(name) if name else None
       
  1071         self.fileobj = fileobj
       
  1072 
       
  1073         # Init datastructures
       
  1074         self.closed = False
       
  1075         self.members = []       # list of members as TarInfo objects
       
  1076         self._loaded = False    # flag if all members have been read
       
  1077         self.offset = self.fileobj.tell()
       
  1078                                 # current position in the archive file
       
  1079         self.inodes = {}        # dictionary caching the inodes of
       
  1080                                 # archive members already added
       
  1081 
       
  1082         if self._mode == "r":
       
  1083             self.firstmember = None
       
  1084             self.firstmember = self.next()
       
  1085 
       
  1086         if self._mode == "a":
       
  1087             # Move to the end of the archive,
       
  1088             # before the first empty block.
       
  1089             self.firstmember = None
       
  1090             while True:
       
  1091                 try:
       
  1092                     tarinfo = self.next()
       
  1093                 except ReadError:
       
  1094                     self.fileobj.seek(0)
       
  1095                     break
       
  1096                 if tarinfo is None:
       
  1097                     self.fileobj.seek(- BLOCKSIZE, 1)
       
  1098                     break
       
  1099 
       
  1100         if self._mode in "aw":
       
  1101             self._loaded = True
       
  1102 
       
  1103     #--------------------------------------------------------------------------
       
  1104     # Below are the classmethods which act as alternate constructors to the
       
  1105     # TarFile class. The open() method is the only one that is needed for
       
  1106     # public use; it is the "super"-constructor and is able to select an
       
  1107     # adequate "sub"-constructor for a particular compression using the mapping
       
  1108     # from OPEN_METH.
       
  1109     #
       
  1110     # This concept allows one to subclass TarFile without losing the comfort of
       
  1111     # the super-constructor. A sub-constructor is registered and made available
       
  1112     # by adding it to the mapping in OPEN_METH.
       
  1113 
       
  1114     @classmethod
       
  1115     def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512):
       
  1116         """Open a tar archive for reading, writing or appending. Return
       
  1117            an appropriate TarFile class.
       
  1118 
       
  1119            mode:
       
  1120            'r' or 'r:*' open for reading with transparent compression
       
  1121            'r:'         open for reading exclusively uncompressed
       
  1122            'r:gz'       open for reading with gzip compression
       
  1123            'r:bz2'      open for reading with bzip2 compression
       
  1124            'a' or 'a:'  open for appending
       
  1125            'w' or 'w:'  open for writing without compression
       
  1126            'w:gz'       open for writing with gzip compression
       
  1127            'w:bz2'      open for writing with bzip2 compression
       
  1128 
       
  1129            'r|*'        open a stream of tar blocks with transparent compression
       
  1130            'r|'         open an uncompressed stream of tar blocks for reading
       
  1131            'r|gz'       open a gzip compressed stream of tar blocks
       
  1132            'r|bz2'      open a bzip2 compressed stream of tar blocks
       
  1133            'w|'         open an uncompressed stream for writing
       
  1134            'w|gz'       open a gzip compressed stream for writing
       
  1135            'w|bz2'      open a bzip2 compressed stream for writing
       
  1136         """
       
  1137 
       
  1138         if not name and not fileobj:
       
  1139             raise ValueError("nothing to open")
       
  1140 
       
  1141         if mode in ("r", "r:*"):
       
  1142             # Find out which *open() is appropriate for opening the file.
       
  1143             for comptype in cls.OPEN_METH:
       
  1144                 func = getattr(cls, cls.OPEN_METH[comptype])
       
  1145                 if fileobj is not None:
       
  1146                     saved_pos = fileobj.tell()
       
  1147                 try:
       
  1148                     return func(name, "r", fileobj)
       
  1149                 except (ReadError, CompressionError):
       
  1150                     if fileobj is not None:
       
  1151                         fileobj.seek(saved_pos)
       
  1152                     continue
       
  1153             raise ReadError("file could not be opened successfully")
       
  1154 
       
  1155         elif ":" in mode:
       
  1156             filemode, comptype = mode.split(":", 1)
       
  1157             filemode = filemode or "r"
       
  1158             comptype = comptype or "tar"
       
  1159 
       
  1160             # Select the *open() function according to
       
  1161             # given compression.
       
  1162             if comptype in cls.OPEN_METH:
       
  1163                 func = getattr(cls, cls.OPEN_METH[comptype])
       
  1164             else:
       
  1165                 raise CompressionError("unknown compression type %r" % comptype)
       
  1166             return func(name, filemode, fileobj)
       
  1167 
       
  1168         elif "|" in mode:
       
  1169             filemode, comptype = mode.split("|", 1)
       
  1170             filemode = filemode or "r"
       
  1171             comptype = comptype or "tar"
       
  1172 
       
  1173             if filemode not in "rw":
       
  1174                 raise ValueError("mode must be 'r' or 'w'")
       
  1175 
       
  1176             t = cls(name, filemode,
       
  1177                     _Stream(name, filemode, comptype, fileobj, bufsize))
       
  1178             t._extfileobj = False
       
  1179             return t
       
  1180 
       
  1181         elif mode in "aw":
       
  1182             return cls.taropen(name, mode, fileobj)
       
  1183 
       
  1184         raise ValueError("undiscernible mode")
       
  1185 
       
  1186     @classmethod
       
  1187     def taropen(cls, name, mode="r", fileobj=None):
       
  1188         """Open uncompressed tar archive name for reading or writing.
       
  1189         """
       
  1190         if len(mode) > 1 or mode not in "raw":
       
  1191             raise ValueError("mode must be 'r', 'a' or 'w'")
       
  1192         return cls(name, mode, fileobj)
       
  1193 
       
  1194     @classmethod
       
  1195     def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9):
       
  1196         """Open gzip compressed tar archive name for reading or writing.
       
  1197            Appending is not allowed.
       
  1198         """
       
  1199         if len(mode) > 1 or mode not in "rw":
       
  1200             raise ValueError("mode must be 'r' or 'w'")
       
  1201 
       
  1202         try:
       
  1203             import gzip
       
  1204             gzip.GzipFile
       
  1205         except (ImportError, AttributeError):
       
  1206             raise CompressionError("gzip module is not available")
       
  1207 
       
  1208         if fileobj is None:
       
  1209             fileobj = file(name, mode + "b")
       
  1210 
       
  1211         try:
       
  1212             t = cls.taropen(name, mode,
       
  1213                 gzip.GzipFile(name, mode, compresslevel, fileobj))
       
  1214         except IOError:
       
  1215             raise ReadError("not a gzip file")
       
  1216         t._extfileobj = False
       
  1217         return t
       
  1218 
       
  1219     @classmethod
       
  1220     def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9):
       
  1221         """Open bzip2 compressed tar archive name for reading or writing.
       
  1222            Appending is not allowed.
       
  1223         """
       
  1224         if len(mode) > 1 or mode not in "rw":
       
  1225             raise ValueError("mode must be 'r' or 'w'.")
       
  1226 
       
  1227         try:
       
  1228             import bz2
       
  1229         except ImportError:
       
  1230             raise CompressionError("bz2 module is not available")
       
  1231 
       
  1232         if fileobj is not None:
       
  1233             fileobj = _BZ2Proxy(fileobj, mode)
       
  1234         else:
       
  1235             fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
       
  1236 
       
  1237         try:
       
  1238             t = cls.taropen(name, mode, fileobj)
       
  1239         except IOError:
       
  1240             raise ReadError("not a bzip2 file")
       
  1241         t._extfileobj = False
       
  1242         return t
       
  1243 
       
  1244     # All *open() methods are registered here.
       
  1245     OPEN_METH = {
       
  1246         "tar": "taropen",   # uncompressed tar
       
  1247         "gz":  "gzopen",    # gzip compressed tar
       
  1248         "bz2": "bz2open"    # bzip2 compressed tar
       
  1249     }
       
  1250 
       
  1251     #--------------------------------------------------------------------------
       
  1252     # The public methods which TarFile provides:
       
  1253 
       
  1254     def close(self):
       
  1255         """Close the TarFile. In write-mode, two finishing zero blocks are
       
  1256            appended to the archive.
       
  1257         """
       
  1258         if self.closed:
       
  1259             return
       
  1260 
       
  1261         if self._mode in "aw":
       
  1262             self.fileobj.write(NUL * (BLOCKSIZE * 2))
       
  1263             self.offset += (BLOCKSIZE * 2)
       
  1264             # fill up the end with zero-blocks
       
  1265             # (like option -b20 for tar does)
       
  1266             blocks, remainder = divmod(self.offset, RECORDSIZE)
       
  1267             if remainder > 0:
       
  1268                 self.fileobj.write(NUL * (RECORDSIZE - remainder))
       
  1269 
       
  1270         if not self._extfileobj:
       
  1271             self.fileobj.close()
       
  1272         self.closed = True
       
  1273 
       
  1274     def getmember(self, name):
       
  1275         """Return a TarInfo object for member `name'. If `name' can not be
       
  1276            found in the archive, KeyError is raised. If a member occurs more
       
  1277            than once in the archive, its last occurence is assumed to be the
       
  1278            most up-to-date version.
       
  1279         """
       
  1280         tarinfo = self._getmember(name)
       
  1281         if tarinfo is None:
       
  1282             raise KeyError("filename %r not found" % name)
       
  1283         return tarinfo
       
  1284 
       
  1285     def getmembers(self):
       
  1286         """Return the members of the archive as a list of TarInfo objects. The
       
  1287            list has the same order as the members in the archive.
       
  1288         """
       
  1289         self._check()
       
  1290         if not self._loaded:    # if we want to obtain a list of
       
  1291             self._load()        # all members, we first have to
       
  1292                                 # scan the whole archive.
       
  1293         return self.members
       
  1294 
       
  1295     def getnames(self):
       
  1296         """Return the members of the archive as a list of their names. It has
       
  1297            the same order as the list returned by getmembers().
       
  1298         """
       
  1299         return [tarinfo.name for tarinfo in self.getmembers()]
       
  1300 
       
  1301     def gettarinfo(self, name=None, arcname=None, fileobj=None):
       
  1302         """Create a TarInfo object for either the file `name' or the file
       
  1303            object `fileobj' (using os.fstat on its file descriptor). You can
       
  1304            modify some of the TarInfo's attributes before you add it using
       
  1305            addfile(). If given, `arcname' specifies an alternative name for the
       
  1306            file in the archive.
       
  1307         """
       
  1308         self._check("aw")
       
  1309 
       
  1310         # When fileobj is given, replace name by
       
  1311         # fileobj's real name.
       
  1312         if fileobj is not None:
       
  1313             name = fileobj.name
       
  1314 
       
  1315         # Building the name of the member in the archive.
       
  1316         # Backward slashes are converted to forward slashes,
       
  1317         # Absolute paths are turned to relative paths.
       
  1318         if arcname is None:
       
  1319             arcname = name
       
  1320         arcname = normpath(arcname)
       
  1321         drv, arcname = os.path.splitdrive(arcname)
       
  1322         while arcname[0:1] == "/":
       
  1323             arcname = arcname[1:]
       
  1324 
       
  1325         # Now, fill the TarInfo object with
       
  1326         # information specific for the file.
       
  1327         tarinfo = TarInfo()
       
  1328 
       
  1329         # Use os.stat or os.lstat, depending on platform
       
  1330         # and if symlinks shall be resolved.
       
  1331         if fileobj is None:
       
  1332             if hasattr(os, "lstat") and not self.dereference:
       
  1333                 statres = os.lstat(name)
       
  1334             else:
       
  1335                 statres = os.stat(name)
       
  1336         else:
       
  1337             statres = os.fstat(fileobj.fileno())
       
  1338         linkname = ""
       
  1339 
       
  1340         stmd = statres.st_mode
       
  1341         if stat.S_ISREG(stmd):
       
  1342             inode = (statres.st_ino, statres.st_dev)
       
  1343             if not self.dereference and \
       
  1344                     statres.st_nlink > 1 and inode in self.inodes:
       
  1345                 # Is it a hardlink to an already
       
  1346                 # archived file?
       
  1347                 type = LNKTYPE
       
  1348                 linkname = self.inodes[inode]
       
  1349             else:
       
  1350                 # The inode is added only if its valid.
       
  1351                 # For win32 it is always 0.
       
  1352                 type = REGTYPE
       
  1353                 if inode[0]:
       
  1354                     self.inodes[inode] = arcname
       
  1355         elif stat.S_ISDIR(stmd):
       
  1356             type = DIRTYPE
       
  1357             if arcname[-1:] != "/":
       
  1358                 arcname += "/"
       
  1359         elif stat.S_ISFIFO(stmd):
       
  1360             type = FIFOTYPE
       
  1361         elif stat.S_ISLNK(stmd):
       
  1362             type = SYMTYPE
       
  1363             linkname = os.readlink(name)
       
  1364         elif stat.S_ISCHR(stmd):
       
  1365             type = CHRTYPE
       
  1366         elif stat.S_ISBLK(stmd):
       
  1367             type = BLKTYPE
       
  1368         else:
       
  1369             return None
       
  1370 
       
  1371         # Fill the TarInfo object with all
       
  1372         # information we can get.
       
  1373         tarinfo.name = arcname
       
  1374         tarinfo.mode = stmd
       
  1375         tarinfo.uid = statres.st_uid
       
  1376         tarinfo.gid = statres.st_gid
       
  1377         if stat.S_ISREG(stmd):
       
  1378             tarinfo.size = statres.st_size
       
  1379         else:
       
  1380             tarinfo.size = 0L
       
  1381         tarinfo.mtime = statres.st_mtime
       
  1382         tarinfo.type = type
       
  1383         tarinfo.linkname = linkname
       
  1384         if pwd:
       
  1385             try:
       
  1386                 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
       
  1387             except KeyError:
       
  1388                 pass
       
  1389         if grp:
       
  1390             try:
       
  1391                 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
       
  1392             except KeyError:
       
  1393                 pass
       
  1394 
       
  1395         if type in (CHRTYPE, BLKTYPE):
       
  1396             if hasattr(os, "major") and hasattr(os, "minor"):
       
  1397                 tarinfo.devmajor = os.major(statres.st_rdev)
       
  1398                 tarinfo.devminor = os.minor(statres.st_rdev)
       
  1399         return tarinfo
       
  1400 
       
  1401     def list(self, verbose=True):
       
  1402         """Print a table of contents to sys.stdout. If `verbose' is False, only
       
  1403            the names of the members are printed. If it is True, an `ls -l'-like
       
  1404            output is produced.
       
  1405         """
       
  1406         self._check()
       
  1407 
       
  1408         for tarinfo in self:
       
  1409             if verbose:
       
  1410                 print filemode(tarinfo.mode),
       
  1411                 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
       
  1412                                  tarinfo.gname or tarinfo.gid),
       
  1413                 if tarinfo.ischr() or tarinfo.isblk():
       
  1414                     print "%10s" % ("%d,%d" \
       
  1415                                     % (tarinfo.devmajor, tarinfo.devminor)),
       
  1416                 else:
       
  1417                     print "%10d" % tarinfo.size,
       
  1418                 print "%d-%02d-%02d %02d:%02d:%02d" \
       
  1419                       % time.localtime(tarinfo.mtime)[:6],
       
  1420 
       
  1421             print tarinfo.name,
       
  1422 
       
  1423             if verbose:
       
  1424                 if tarinfo.issym():
       
  1425                     print "->", tarinfo.linkname,
       
  1426                 if tarinfo.islnk():
       
  1427                     print "link to", tarinfo.linkname,
       
  1428             print
       
  1429 
       
  1430     def add(self, name, arcname=None, recursive=True):
       
  1431         """Add the file `name' to the archive. `name' may be any type of file
       
  1432            (directory, fifo, symbolic link, etc.). If given, `arcname'
       
  1433            specifies an alternative name for the file in the archive.
       
  1434            Directories are added recursively by default. This can be avoided by
       
  1435            setting `recursive' to False.
       
  1436         """
       
  1437         self._check("aw")
       
  1438 
       
  1439         if arcname is None:
       
  1440             arcname = name
       
  1441 
       
  1442         # Skip if somebody tries to archive the archive...
       
  1443         if self.name is not None and os.path.abspath(name) == self.name:
       
  1444             self._dbg(2, "tarfile: Skipped %r" % name)
       
  1445             return
       
  1446 
       
  1447         # Special case: The user wants to add the current
       
  1448         # working directory.
       
  1449         if name == ".":
       
  1450             if recursive:
       
  1451                 if arcname == ".":
       
  1452                     arcname = ""
       
  1453                 for f in os.listdir("."):
       
  1454                     self.add(f, os.path.join(arcname, f))
       
  1455             return
       
  1456 
       
  1457         self._dbg(1, name)
       
  1458 
       
  1459         # Create a TarInfo object from the file.
       
  1460         tarinfo = self.gettarinfo(name, arcname)
       
  1461 
       
  1462         if tarinfo is None:
       
  1463             self._dbg(1, "tarfile: Unsupported type %r" % name)
       
  1464             return
       
  1465 
       
  1466         # Append the tar header and data to the archive.
       
  1467         if tarinfo.isreg():
       
  1468             f = file(name, "rb")
       
  1469             self.addfile(tarinfo, f)
       
  1470             f.close()
       
  1471 
       
  1472         elif tarinfo.isdir():
       
  1473             self.addfile(tarinfo)
       
  1474             if recursive:
       
  1475                 for f in os.listdir(name):
       
  1476                     self.add(os.path.join(name, f), os.path.join(arcname, f))
       
  1477 
       
  1478         else:
       
  1479             self.addfile(tarinfo)
       
  1480 
       
  1481     def addfile(self, tarinfo, fileobj=None):
       
  1482         """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
       
  1483            given, tarinfo.size bytes are read from it and added to the archive.
       
  1484            You can create TarInfo objects using gettarinfo().
       
  1485            On Windows platforms, `fileobj' should always be opened with mode
       
  1486            'rb' to avoid irritation about the file size.
       
  1487         """
       
  1488         self._check("aw")
       
  1489 
       
  1490         tarinfo = copy.copy(tarinfo)
       
  1491 
       
  1492         buf = tarinfo.tobuf(self.posix)
       
  1493         self.fileobj.write(buf)
       
  1494         self.offset += len(buf)
       
  1495 
       
  1496         # If there's data to follow, append it.
       
  1497         if fileobj is not None:
       
  1498             copyfileobj(fileobj, self.fileobj, tarinfo.size)
       
  1499             blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
       
  1500             if remainder > 0:
       
  1501                 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
       
  1502                 blocks += 1
       
  1503             self.offset += blocks * BLOCKSIZE
       
  1504 
       
  1505         self.members.append(tarinfo)
       
  1506 
       
  1507     def extractall(self, path=".", members=None):
       
  1508         """Extract all members from the archive to the current working
       
  1509            directory and set owner, modification time and permissions on
       
  1510            directories afterwards. `path' specifies a different directory
       
  1511            to extract to. `members' is optional and must be a subset of the
       
  1512            list returned by getmembers().
       
  1513         """
       
  1514         directories = []
       
  1515 
       
  1516         if members is None:
       
  1517             members = self
       
  1518 
       
  1519         for tarinfo in members:
       
  1520             if tarinfo.isdir():
       
  1521                 # Extract directories with a safe mode.
       
  1522                 directories.append(tarinfo)
       
  1523                 tarinfo = copy.copy(tarinfo)
       
  1524                 tarinfo.mode = 0700
       
  1525             self.extract(tarinfo, path)
       
  1526 
       
  1527         # Reverse sort directories.
       
  1528         directories.sort(lambda a, b: cmp(a.name, b.name))
       
  1529         directories.reverse()
       
  1530 
       
  1531         # Set correct owner, mtime and filemode on directories.
       
  1532         for tarinfo in directories:
       
  1533             dirpath = os.path.join(path, tarinfo.name)
       
  1534             try:
       
  1535                 self.chown(tarinfo, dirpath)
       
  1536                 self.utime(tarinfo, dirpath)
       
  1537                 self.chmod(tarinfo, dirpath)
       
  1538             except ExtractError, e:
       
  1539                 if self.errorlevel > 1:
       
  1540                     raise
       
  1541                 else:
       
  1542                     self._dbg(1, "tarfile: %s" % e)
       
  1543 
       
  1544     def extract(self, member, path=""):
       
  1545         """Extract a member from the archive to the current working directory,
       
  1546            using its full name. Its file information is extracted as accurately
       
  1547            as possible. `member' may be a filename or a TarInfo object. You can
       
  1548            specify a different directory using `path'.
       
  1549         """
       
  1550         self._check("r")
       
  1551 
       
  1552         if isinstance(member, TarInfo):
       
  1553             tarinfo = member
       
  1554         else:
       
  1555             tarinfo = self.getmember(member)
       
  1556 
       
  1557         # Prepare the link target for makelink().
       
  1558         if tarinfo.islnk():
       
  1559             tarinfo._link_target = os.path.join(path, tarinfo.linkname)
       
  1560 
       
  1561         try:
       
  1562             self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
       
  1563         except EnvironmentError, e:
       
  1564             if self.errorlevel > 0:
       
  1565                 raise
       
  1566             else:
       
  1567                 if e.filename is None:
       
  1568                     self._dbg(1, "tarfile: %s" % e.strerror)
       
  1569                 else:
       
  1570                     self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
       
  1571         except ExtractError, e:
       
  1572             if self.errorlevel > 1:
       
  1573                 raise
       
  1574             else:
       
  1575                 self._dbg(1, "tarfile: %s" % e)
       
  1576 
       
  1577     def extractfile(self, member):
       
  1578         """Extract a member from the archive as a file object. `member' may be
       
  1579            a filename or a TarInfo object. If `member' is a regular file, a
       
  1580            file-like object is returned. If `member' is a link, a file-like
       
  1581            object is constructed from the link's target. If `member' is none of
       
  1582            the above, None is returned.
       
  1583            The file-like object is read-only and provides the following
       
  1584            methods: read(), readline(), readlines(), seek() and tell()
       
  1585         """
       
  1586         self._check("r")
       
  1587 
       
  1588         if isinstance(member, TarInfo):
       
  1589             tarinfo = member
       
  1590         else:
       
  1591             tarinfo = self.getmember(member)
       
  1592 
       
  1593         if tarinfo.isreg():
       
  1594             return self.fileobject(self, tarinfo)
       
  1595 
       
  1596         elif tarinfo.type not in SUPPORTED_TYPES:
       
  1597             # If a member's type is unknown, it is treated as a
       
  1598             # regular file.
       
  1599             return self.fileobject(self, tarinfo)
       
  1600 
       
  1601         elif tarinfo.islnk() or tarinfo.issym():
       
  1602             if isinstance(self.fileobj, _Stream):
       
  1603                 # A small but ugly workaround for the case that someone tries
       
  1604                 # to extract a (sym)link as a file-object from a non-seekable
       
  1605                 # stream of tar blocks.
       
  1606                 raise StreamError("cannot extract (sym)link as file object")
       
  1607             else:
       
  1608                 # A (sym)link's file object is its target's file object.
       
  1609                 return self.extractfile(self._getmember(tarinfo.linkname,
       
  1610                                                         tarinfo))
       
  1611         else:
       
  1612             # If there's no data associated with the member (directory, chrdev,
       
  1613             # blkdev, etc.), return None instead of a file object.
       
  1614             return None
       
  1615 
       
  1616     def _extract_member(self, tarinfo, targetpath):
       
  1617         """Extract the TarInfo object tarinfo to a physical
       
  1618            file called targetpath.
       
  1619         """
       
  1620         # Fetch the TarInfo object for the given name
       
  1621         # and build the destination pathname, replacing
       
  1622         # forward slashes to platform specific separators.
       
  1623         if targetpath[-1:] == "/":
       
  1624             targetpath = targetpath[:-1]
       
  1625         targetpath = os.path.normpath(targetpath)
       
  1626 
       
  1627         # Create all upper directories.
       
  1628         upperdirs = os.path.dirname(targetpath)
       
  1629         if upperdirs and not os.path.exists(upperdirs):
       
  1630             # Create directories that are not part of the archive with
       
  1631             # default permissions.
       
  1632             os.makedirs(upperdirs)
       
  1633 
       
  1634         if tarinfo.islnk() or tarinfo.issym():
       
  1635             self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
       
  1636         else:
       
  1637             self._dbg(1, tarinfo.name)
       
  1638 
       
  1639         if tarinfo.isreg():
       
  1640             self.makefile(tarinfo, targetpath)
       
  1641         elif tarinfo.isdir():
       
  1642             self.makedir(tarinfo, targetpath)
       
  1643         elif tarinfo.isfifo():
       
  1644             self.makefifo(tarinfo, targetpath)
       
  1645         elif tarinfo.ischr() or tarinfo.isblk():
       
  1646             self.makedev(tarinfo, targetpath)
       
  1647         elif tarinfo.islnk() or tarinfo.issym():
       
  1648             self.makelink(tarinfo, targetpath)
       
  1649         elif tarinfo.type not in SUPPORTED_TYPES:
       
  1650             self.makeunknown(tarinfo, targetpath)
       
  1651         else:
       
  1652             self.makefile(tarinfo, targetpath)
       
  1653 
       
  1654         self.chown(tarinfo, targetpath)
       
  1655         if not tarinfo.issym():
       
  1656             self.chmod(tarinfo, targetpath)
       
  1657             self.utime(tarinfo, targetpath)
       
  1658 
       
  1659     #--------------------------------------------------------------------------
       
  1660     # Below are the different file methods. They are called via
       
  1661     # _extract_member() when extract() is called. They can be replaced in a
       
  1662     # subclass to implement other functionality.
       
  1663 
       
  1664     def makedir(self, tarinfo, targetpath):
       
  1665         """Make a directory called targetpath.
       
  1666         """
       
  1667         try:
       
  1668             # Use a safe mode for the directory, the real mode is set
       
  1669             # later in _extract_member().
       
  1670             os.mkdir(targetpath, 0700)
       
  1671         except EnvironmentError, e:
       
  1672             if e.errno != errno.EEXIST:
       
  1673                 raise
       
  1674 
       
  1675     def makefile(self, tarinfo, targetpath):
       
  1676         """Make a file called targetpath.
       
  1677         """
       
  1678         source = self.extractfile(tarinfo)
       
  1679         target = file(targetpath, "wb")
       
  1680         copyfileobj(source, target)
       
  1681         source.close()
       
  1682         target.close()
       
  1683 
       
  1684     def makeunknown(self, tarinfo, targetpath):
       
  1685         """Make a file from a TarInfo object with an unknown type
       
  1686            at targetpath.
       
  1687         """
       
  1688         self.makefile(tarinfo, targetpath)
       
  1689         self._dbg(1, "tarfile: Unknown file type %r, " \
       
  1690                      "extracted as regular file." % tarinfo.type)
       
  1691 
       
  1692     def makefifo(self, tarinfo, targetpath):
       
  1693         """Make a fifo called targetpath.
       
  1694         """
       
  1695         if hasattr(os, "mkfifo"):
       
  1696             os.mkfifo(targetpath)
       
  1697         else:
       
  1698             raise ExtractError("fifo not supported by system")
       
  1699 
       
  1700     def makedev(self, tarinfo, targetpath):
       
  1701         """Make a character or block device called targetpath.
       
  1702         """
       
  1703         if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
       
  1704             raise ExtractError("special devices not supported by system")
       
  1705 
       
  1706         mode = tarinfo.mode
       
  1707         if tarinfo.isblk():
       
  1708             mode |= stat.S_IFBLK
       
  1709         else:
       
  1710             mode |= stat.S_IFCHR
       
  1711 
       
  1712         os.mknod(targetpath, mode,
       
  1713                  os.makedev(tarinfo.devmajor, tarinfo.devminor))
       
  1714 
       
  1715     def makelink(self, tarinfo, targetpath):
       
  1716         """Make a (symbolic) link called targetpath. If it cannot be created
       
  1717           (platform limitation), we try to make a copy of the referenced file
       
  1718           instead of a link.
       
  1719         """
       
  1720         linkpath = tarinfo.linkname
       
  1721         try:
       
  1722             if tarinfo.issym():
       
  1723                 os.symlink(linkpath, targetpath)
       
  1724             else:
       
  1725                 # See extract().
       
  1726                 os.link(tarinfo._link_target, targetpath)
       
  1727         except AttributeError:
       
  1728             if tarinfo.issym():
       
  1729                 linkpath = os.path.join(os.path.dirname(tarinfo.name),
       
  1730                                         linkpath)
       
  1731                 linkpath = normpath(linkpath)
       
  1732 
       
  1733             try:
       
  1734                 self._extract_member(self.getmember(linkpath), targetpath)
       
  1735             except (EnvironmentError, KeyError), e:
       
  1736                 linkpath = os.path.normpath(linkpath)
       
  1737                 try:
       
  1738                     shutil.copy2(linkpath, targetpath)
       
  1739                 except EnvironmentError, e:
       
  1740                     raise IOError("link could not be created")
       
  1741 
       
  1742     def chown(self, tarinfo, targetpath):
       
  1743         """Set owner of targetpath according to tarinfo.
       
  1744         """
       
  1745         if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
       
  1746             # We have to be root to do so.
       
  1747             try:
       
  1748                 g = grp.getgrnam(tarinfo.gname)[2]
       
  1749             except KeyError:
       
  1750                 try:
       
  1751                     g = grp.getgrgid(tarinfo.gid)[2]
       
  1752                 except KeyError:
       
  1753                     g = os.getgid()
       
  1754             try:
       
  1755                 u = pwd.getpwnam(tarinfo.uname)[2]
       
  1756             except KeyError:
       
  1757                 try:
       
  1758                     u = pwd.getpwuid(tarinfo.uid)[2]
       
  1759                 except KeyError:
       
  1760                     u = os.getuid()
       
  1761             try:
       
  1762                 if tarinfo.issym() and hasattr(os, "lchown"):
       
  1763                     os.lchown(targetpath, u, g)
       
  1764                 else:
       
  1765                     if sys.platform != "os2emx":
       
  1766                         os.chown(targetpath, u, g)
       
  1767             except EnvironmentError, e:
       
  1768                 raise ExtractError("could not change owner")
       
  1769 
       
  1770     def chmod(self, tarinfo, targetpath):
       
  1771         """Set file permissions of targetpath according to tarinfo.
       
  1772         """
       
  1773         if hasattr(os, 'chmod'):
       
  1774             try:
       
  1775                 os.chmod(targetpath, tarinfo.mode)
       
  1776             except EnvironmentError, e:
       
  1777                 raise ExtractError("could not change mode")
       
  1778 
       
  1779     def utime(self, tarinfo, targetpath):
       
  1780         """Set modification time of targetpath according to tarinfo.
       
  1781         """
       
  1782         if not hasattr(os, 'utime'):
       
  1783             return
       
  1784         if sys.platform == "win32" and tarinfo.isdir():
       
  1785             # According to msdn.microsoft.com, it is an error (EACCES)
       
  1786             # to use utime() on directories.
       
  1787             return
       
  1788         try:
       
  1789             os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
       
  1790         except EnvironmentError, e:
       
  1791             raise ExtractError("could not change modification time")
       
  1792 
       
  1793     #--------------------------------------------------------------------------
       
  1794     def next(self):
       
  1795         """Return the next member of the archive as a TarInfo object, when
       
  1796            TarFile is opened for reading. Return None if there is no more
       
  1797            available.
       
  1798         """
       
  1799         self._check("ra")
       
  1800         if self.firstmember is not None:
       
  1801             m = self.firstmember
       
  1802             self.firstmember = None
       
  1803             return m
       
  1804 
       
  1805         # Read the next block.
       
  1806         self.fileobj.seek(self.offset)
       
  1807         while True:
       
  1808             buf = self.fileobj.read(BLOCKSIZE)
       
  1809             if not buf:
       
  1810                 return None
       
  1811 
       
  1812             try:
       
  1813                 tarinfo = TarInfo.frombuf(buf)
       
  1814 
       
  1815                 # Set the TarInfo object's offset to the current position of the
       
  1816                 # TarFile and set self.offset to the position where the data blocks
       
  1817                 # should begin.
       
  1818                 tarinfo.offset = self.offset
       
  1819                 self.offset += BLOCKSIZE
       
  1820 
       
  1821                 tarinfo = self.proc_member(tarinfo)
       
  1822 
       
  1823             except ValueError, e:
       
  1824                 if self.ignore_zeros:
       
  1825                     self._dbg(2, "0x%X: empty or invalid block: %s" %
       
  1826                               (self.offset, e))
       
  1827                     self.offset += BLOCKSIZE
       
  1828                     continue
       
  1829                 else:
       
  1830                     if self.offset == 0:
       
  1831                         raise ReadError("empty, unreadable or compressed "
       
  1832                                         "file: %s" % e)
       
  1833                     return None
       
  1834             break
       
  1835 
       
  1836         # Some old tar programs represent a directory as a regular
       
  1837         # file with a trailing slash.
       
  1838         if tarinfo.isreg() and tarinfo.name.endswith("/"):
       
  1839             tarinfo.type = DIRTYPE
       
  1840 
       
  1841         # Directory names should have a '/' at the end.
       
  1842         if tarinfo.isdir() and not tarinfo.name.endswith("/"):
       
  1843             tarinfo.name += "/"
       
  1844 
       
  1845         self.members.append(tarinfo)
       
  1846         return tarinfo
       
  1847 
       
  1848     #--------------------------------------------------------------------------
       
  1849     # The following are methods that are called depending on the type of a
       
  1850     # member. The entry point is proc_member() which is called with a TarInfo
       
  1851     # object created from the header block from the current offset. The
       
  1852     # proc_member() method can be overridden in a subclass to add custom
       
  1853     # proc_*() methods. A proc_*() method MUST implement the following
       
  1854     # operations:
       
  1855     # 1. Set tarinfo.offset_data to the position where the data blocks begin,
       
  1856     #    if there is data that follows.
       
  1857     # 2. Set self.offset to the position where the next member's header will
       
  1858     #    begin.
       
  1859     # 3. Return tarinfo or another valid TarInfo object.
       
  1860     def proc_member(self, tarinfo):
       
  1861         """Choose the right processing method for tarinfo depending
       
  1862            on its type and call it.
       
  1863         """
       
  1864         if tarinfo.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
       
  1865             return self.proc_gnulong(tarinfo)
       
  1866         elif tarinfo.type == GNUTYPE_SPARSE:
       
  1867             return self.proc_sparse(tarinfo)
       
  1868         else:
       
  1869             return self.proc_builtin(tarinfo)
       
  1870 
       
  1871     def proc_builtin(self, tarinfo):
       
  1872         """Process a builtin type member or an unknown member
       
  1873            which will be treated as a regular file.
       
  1874         """
       
  1875         tarinfo.offset_data = self.offset
       
  1876         if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
       
  1877             # Skip the following data blocks.
       
  1878             self.offset += self._block(tarinfo.size)
       
  1879         return tarinfo
       
  1880 
       
  1881     def proc_gnulong(self, tarinfo):
       
  1882         """Process the blocks that hold a GNU longname
       
  1883            or longlink member.
       
  1884         """
       
  1885         buf = ""
       
  1886         count = tarinfo.size
       
  1887         while count > 0:
       
  1888             block = self.fileobj.read(BLOCKSIZE)
       
  1889             buf += block
       
  1890             self.offset += BLOCKSIZE
       
  1891             count -= BLOCKSIZE
       
  1892 
       
  1893         # Fetch the next header and process it.
       
  1894         b = self.fileobj.read(BLOCKSIZE)
       
  1895         t = TarInfo.frombuf(b)
       
  1896         t.offset = self.offset
       
  1897         self.offset += BLOCKSIZE
       
  1898         next = self.proc_member(t)
       
  1899 
       
  1900         # Patch the TarInfo object from the next header with
       
  1901         # the longname information.
       
  1902         next.offset = tarinfo.offset
       
  1903         if tarinfo.type == GNUTYPE_LONGNAME:
       
  1904             next.name = nts(buf)
       
  1905         elif tarinfo.type == GNUTYPE_LONGLINK:
       
  1906             next.linkname = nts(buf)
       
  1907 
       
  1908         return next
       
  1909 
       
  1910     def proc_sparse(self, tarinfo):
       
  1911         """Process a GNU sparse header plus extra headers.
       
  1912         """
       
  1913         buf = tarinfo.buf
       
  1914         sp = _ringbuffer()
       
  1915         pos = 386
       
  1916         lastpos = 0L
       
  1917         realpos = 0L
       
  1918         # There are 4 possible sparse structs in the
       
  1919         # first header.
       
  1920         for i in xrange(4):
       
  1921             try:
       
  1922                 offset = nti(buf[pos:pos + 12])
       
  1923                 numbytes = nti(buf[pos + 12:pos + 24])
       
  1924             except ValueError:
       
  1925                 break
       
  1926             if offset > lastpos:
       
  1927                 sp.append(_hole(lastpos, offset - lastpos))
       
  1928             sp.append(_data(offset, numbytes, realpos))
       
  1929             realpos += numbytes
       
  1930             lastpos = offset + numbytes
       
  1931             pos += 24
       
  1932 
       
  1933         isextended = ord(buf[482])
       
  1934         origsize = nti(buf[483:495])
       
  1935 
       
  1936         # If the isextended flag is given,
       
  1937         # there are extra headers to process.
       
  1938         while isextended == 1:
       
  1939             buf = self.fileobj.read(BLOCKSIZE)
       
  1940             self.offset += BLOCKSIZE
       
  1941             pos = 0
       
  1942             for i in xrange(21):
       
  1943                 try:
       
  1944                     offset = nti(buf[pos:pos + 12])
       
  1945                     numbytes = nti(buf[pos + 12:pos + 24])
       
  1946                 except ValueError:
       
  1947                     break
       
  1948                 if offset > lastpos:
       
  1949                     sp.append(_hole(lastpos, offset - lastpos))
       
  1950                 sp.append(_data(offset, numbytes, realpos))
       
  1951                 realpos += numbytes
       
  1952                 lastpos = offset + numbytes
       
  1953                 pos += 24
       
  1954             isextended = ord(buf[504])
       
  1955 
       
  1956         if lastpos < origsize:
       
  1957             sp.append(_hole(lastpos, origsize - lastpos))
       
  1958 
       
  1959         tarinfo.sparse = sp
       
  1960 
       
  1961         tarinfo.offset_data = self.offset
       
  1962         self.offset += self._block(tarinfo.size)
       
  1963         tarinfo.size = origsize
       
  1964 
       
  1965         return tarinfo
       
  1966 
       
  1967     #--------------------------------------------------------------------------
       
  1968     # Little helper methods:
       
  1969 
       
  1970     def _block(self, count):
       
  1971         """Round up a byte count by BLOCKSIZE and return it,
       
  1972            e.g. _block(834) => 1024.
       
  1973         """
       
  1974         blocks, remainder = divmod(count, BLOCKSIZE)
       
  1975         if remainder:
       
  1976             blocks += 1
       
  1977         return blocks * BLOCKSIZE
       
  1978 
       
  1979     def _getmember(self, name, tarinfo=None):
       
  1980         """Find an archive member by name from bottom to top.
       
  1981            If tarinfo is given, it is used as the starting point.
       
  1982         """
       
  1983         # Ensure that all members have been loaded.
       
  1984         members = self.getmembers()
       
  1985 
       
  1986         if tarinfo is None:
       
  1987             end = len(members)
       
  1988         else:
       
  1989             end = members.index(tarinfo)
       
  1990 
       
  1991         for i in xrange(end - 1, -1, -1):
       
  1992             if name == members[i].name:
       
  1993                 return members[i]
       
  1994 
       
  1995     def _load(self):
       
  1996         """Read through the entire archive file and look for readable
       
  1997            members.
       
  1998         """
       
  1999         while True:
       
  2000             tarinfo = self.next()
       
  2001             if tarinfo is None:
       
  2002                 break
       
  2003         self._loaded = True
       
  2004 
       
  2005     def _check(self, mode=None):
       
  2006         """Check if TarFile is still open, and if the operation's mode
       
  2007            corresponds to TarFile's mode.
       
  2008         """
       
  2009         if self.closed:
       
  2010             raise IOError("%s is closed" % self.__class__.__name__)
       
  2011         if mode is not None and self._mode not in mode:
       
  2012             raise IOError("bad operation for mode %r" % self._mode)
       
  2013 
       
  2014     def __iter__(self):
       
  2015         """Provide an iterator object.
       
  2016         """
       
  2017         if self._loaded:
       
  2018             return iter(self.members)
       
  2019         else:
       
  2020             return TarIter(self)
       
  2021 
       
  2022     def _dbg(self, level, msg):
       
  2023         """Write debugging output to sys.stderr.
       
  2024         """
       
  2025         if level <= self.debug:
       
  2026             print >> sys.stderr, msg
       
  2027 # class TarFile
       
  2028 
       
  2029 class TarIter:
       
  2030     """Iterator Class.
       
  2031 
       
  2032        for tarinfo in TarFile(...):
       
  2033            suite...
       
  2034     """
       
  2035 
       
  2036     def __init__(self, tarfile):
       
  2037         """Construct a TarIter object.
       
  2038         """
       
  2039         self.tarfile = tarfile
       
  2040         self.index = 0
       
  2041     def __iter__(self):
       
  2042         """Return iterator object.
       
  2043         """
       
  2044         return self
       
  2045     def next(self):
       
  2046         """Return the next item using TarFile's next() method.
       
  2047            When all members have been read, set TarFile as _loaded.
       
  2048         """
       
  2049         # Fix for SF #1100429: Under rare circumstances it can
       
  2050         # happen that getmembers() is called during iteration,
       
  2051         # which will cause TarIter to stop prematurely.
       
  2052         if not self.tarfile._loaded:
       
  2053             tarinfo = self.tarfile.next()
       
  2054             if not tarinfo:
       
  2055                 self.tarfile._loaded = True
       
  2056                 raise StopIteration
       
  2057         else:
       
  2058             try:
       
  2059                 tarinfo = self.tarfile.members[self.index]
       
  2060             except IndexError:
       
  2061                 raise StopIteration
       
  2062         self.index += 1
       
  2063         return tarinfo
       
  2064 
       
  2065 # Helper classes for sparse file support
       
  2066 class _section:
       
  2067     """Base class for _data and _hole.
       
  2068     """
       
  2069     def __init__(self, offset, size):
       
  2070         self.offset = offset
       
  2071         self.size = size
       
  2072     def __contains__(self, offset):
       
  2073         return self.offset <= offset < self.offset + self.size
       
  2074 
       
  2075 class _data(_section):
       
  2076     """Represent a data section in a sparse file.
       
  2077     """
       
  2078     def __init__(self, offset, size, realpos):
       
  2079         _section.__init__(self, offset, size)
       
  2080         self.realpos = realpos
       
  2081 
       
  2082 class _hole(_section):
       
  2083     """Represent a hole section in a sparse file.
       
  2084     """
       
  2085     pass
       
  2086 
       
  2087 class _ringbuffer(list):
       
  2088     """Ringbuffer class which increases performance
       
  2089        over a regular list.
       
  2090     """
       
  2091     def __init__(self):
       
  2092         self.idx = 0
       
  2093     def find(self, offset):
       
  2094         idx = self.idx
       
  2095         while True:
       
  2096             item = self[idx]
       
  2097             if offset in item:
       
  2098                 break
       
  2099             idx += 1
       
  2100             if idx == len(self):
       
  2101                 idx = 0
       
  2102             if idx == self.idx:
       
  2103                 # End of File
       
  2104                 return None
       
  2105         self.idx = idx
       
  2106         return item
       
  2107 
       
  2108 #---------------------------------------------
       
  2109 # zipfile compatible TarFile class
       
  2110 #---------------------------------------------
       
  2111 TAR_PLAIN = 0           # zipfile.ZIP_STORED
       
  2112 TAR_GZIPPED = 8         # zipfile.ZIP_DEFLATED
       
  2113 class TarFileCompat:
       
  2114     """TarFile class compatible with standard module zipfile's
       
  2115        ZipFile class.
       
  2116     """
       
  2117     def __init__(self, file, mode="r", compression=TAR_PLAIN):
       
  2118         if compression == TAR_PLAIN:
       
  2119             self.tarfile = TarFile.taropen(file, mode)
       
  2120         elif compression == TAR_GZIPPED:
       
  2121             self.tarfile = TarFile.gzopen(file, mode)
       
  2122         else:
       
  2123             raise ValueError("unknown compression constant")
       
  2124         if mode[0:1] == "r":
       
  2125             members = self.tarfile.getmembers()
       
  2126             for m in members:
       
  2127                 m.filename = m.name
       
  2128                 m.file_size = m.size
       
  2129                 m.date_time = time.gmtime(m.mtime)[:6]
       
  2130     def namelist(self):
       
  2131         return map(lambda m: m.name, self.infolist())
       
  2132     def infolist(self):
       
  2133         return filter(lambda m: m.type in REGULAR_TYPES,
       
  2134                       self.tarfile.getmembers())
       
  2135     def printdir(self):
       
  2136         self.tarfile.list()
       
  2137     def testzip(self):
       
  2138         return
       
  2139     def getinfo(self, name):
       
  2140         return self.tarfile.getmember(name)
       
  2141     def read(self, name):
       
  2142         return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
       
  2143     def write(self, filename, arcname=None, compress_type=None):
       
  2144         self.tarfile.add(filename, arcname)
       
  2145     def writestr(self, zinfo, bytes):
       
  2146         try:
       
  2147             from cStringIO import StringIO
       
  2148         except ImportError:
       
  2149             from StringIO import StringIO
       
  2150         import calendar
       
  2151         zinfo.name = zinfo.filename
       
  2152         zinfo.size = zinfo.file_size
       
  2153         zinfo.mtime = calendar.timegm(zinfo.date_time)
       
  2154         self.tarfile.addfile(zinfo, StringIO(bytes))
       
  2155     def close(self):
       
  2156         self.tarfile.close()
       
  2157 #class TarFileCompat
       
  2158 
       
  2159 #--------------------
       
  2160 # exported functions
       
  2161 #--------------------
       
  2162 def is_tarfile(name):
       
  2163     """Return True if name points to a tar archive that we
       
  2164        are able to handle, else return False.
       
  2165     """
       
  2166     try:
       
  2167         t = open(name)
       
  2168         t.close()
       
  2169         return True
       
  2170     except TarError:
       
  2171         return False
       
  2172 
       
  2173 open = TarFile.open