releasing/blocks/framework/src/Blocks/arfile.py
changeset 632 934f9131337b
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/releasing/blocks/framework/src/Blocks/arfile.py	Thu Sep 02 15:02:14 2010 +0800
@@ -0,0 +1,383 @@
+#
+# Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+# All rights reserved.
+# This component and the accompanying materials are made available
+# under the terms of "Eclipse Public License v1.0"
+# which accompanies this distribution, and is available
+# at the URL "http://www.eclipse.org/legal/epl-v10.html".
+#
+# Initial Contributors:
+# Nokia Corporation - initial contribution.
+#
+# Contributors:
+#
+# Description:
+# Ar-file manager
+#
+
+import os
+import struct
+import stat
+import time
+
+_GLOBAL_HEADER = "!<arch>\n"
+_FILE_HEADER_STRING = "16s 12s 6s 6s 8s 10s 2s"
+_FILE_HEADER_STRUCT = struct.Struct(_FILE_HEADER_STRING)
+_FILE_MAGIC = "\140\012"
+
+class ArError(Exception):
+    ''' Ar-archive error '''
+    def __init__(self, error):
+        Exception.__init__(self, "Ar-archive error: %s" % error)
+        self.error = error
+
+class ArInfo(object):
+    ''' Ar-file information '''
+    __slots__  = "name", "size", "mtime", "uid", "gid", "mode", "offset"
+
+    def __init__(self, *args, **kwargs):
+        '''
+        @param name: Filename
+        @type name:  String
+        @param path: Path of file, use to set values
+        @type path:  String
+
+        If both name and path are given, the name overrides the name from path.
+
+        Use case 1, new ArInfo::
+            a = ArInfo()                                  # empty name
+            a = ArInfo("myFile")
+            a = ArInfo(name="myFile")
+
+        Use case 2, read from file::
+            a = ArInfo(path="/path/to/file")
+            a = ArInfo("myFile", path="/path/to/file")    # name is myFile rather than file
+            a = ArInfo("myFile", "/path/to/file")         # name is myFile rather than file
+
+        use case 3, set all values from list, as read from raw ar header::
+            a = ArInfo(list)
+        '''
+        path = None
+        self.name = ""          # file name
+        self.size = 0           # size in bytes
+        self.mtime = int(time.time())          # modification time
+        self.uid = 0            # user ID
+        self.gid = 0            # group ID
+        self.mode = 100644      # permissions
+        self.offset = 0         # offset in archive
+
+        if len(args) == 1:
+            if isinstance(args[0], basestring): # name only
+                self.name = args[0]
+            elif isinstance(args[0], list):
+                assert len(args[0]) == 8 # from raw headers, ignore magic
+                (self.name, self.mtime, self.uid,
+                self.gid, self.mode, self.size,
+                ignoreMagic, self.offset) = args[0]
+                self.size = int(self.size)
+        elif len(args) == 2:
+            self.name = args[0]
+            path = args[1]
+
+        self.name = kwargs.get("name", self.name)
+        path = kwargs.get("path", path)
+
+        if path:
+            try:
+                statinfo = os.stat(path)
+            except EnvironmentError, ex:
+                raise ArError("File '%s' not found" % path)
+
+            self.name = self.name or os.path.basename(path) # + "/" # trailing slash is GNU way to allow spaces in name
+            self.size = str(statinfo.st_size)
+            self.mtime = str(statinfo.st_mtime)
+            self.uid = str(statinfo.st_uid)
+            self.gid = str(statinfo.st_gid)
+            self.mode = str(oct(stat.S_IMODE(statinfo.st_mode)))
+
+    def getHeader(self):
+        return _FILE_HEADER_STRUCT.pack(self.name,# + "/", # trailing slash is GNU way to allow spaces in name
+                                        str(self.mtime),
+                                        str(self.uid),
+                                        str(self.gid),
+                                        str(self.mode),
+                                        str(self.size),
+                                        _FILE_MAGIC).replace("\x00", " ")
+
+    def __str__(self):
+        return ", ".join(["%s: %s" % (a, str(getattr(self, a))) for a in self.__slots__])
+
+class ArFile(object):
+    '''
+    Creating a new archive::
+
+        a = ArFile("new.ar", "w")
+        a.add("/some/file")
+        a.close()
+
+    Appending to an existing archive::
+
+        a = ArFile("old.ar", "a")
+        i = ArInfo("myfile.tar.gz")
+        a.addfile(i)
+        t = TarFile.open(mode="w|gz", fileobj=a)
+        t.close()
+        a.close()
+
+    Reading a member file directly (no tempfiles)::
+        a = ArFile("old.ar")
+        a.extractfiles("my_member.file")
+        a.read()
+
+    '''
+
+    MODES = ("r", "w", "a")
+
+    def __init__(self, name=None, mode="r", fileobj=None):
+        if mode not in ArFile.MODES:
+            raise ArError("Mode is %s. Mode must be one of '%s'." % (mode, ", ".join(ArFile.MODES)))
+        self.files = {}
+        self.archive = None
+        self.mode = mode
+        self.lastOpen = None            # archive size before last addition
+        self.startSize = None
+        self.filesToRead = []           # names of files to read()
+        self.opened = False
+        if name or fileobj:
+            self.open(name, fileobj)
+
+    def open(self, name=None, fileobj=None):
+        if fileobj and hasattr(fileobj, "read"):
+            self.archive = fileobj
+        else:
+            try:
+                self.archive = open(name, {"r": "rb", "w": "w+b", "a": "r+b"}[self.mode])
+                self.opened = True
+            except IOError:
+                raise ArError("File '%s' could not be opened" % name)
+        if self.mode == "w":
+            self.archive.write(_GLOBAL_HEADER)
+        else:
+            self._readHeaders()
+
+    def close(self):
+        if self.filesToRead:
+            self.filesToRead = None
+        self._endFile()
+        if self.opened:
+            self.archive.close()
+
+    def add(self, name, arcname=None):
+        info = ArInfo(arcname, name)
+        with open(name, "rb") as f:
+            self.addfile(info, f)
+
+    def remove(self, name):
+        fileheader = self.files.get(name)
+        if not fileheader:
+            raise ArError("File '%s' not found from archive" % name)
+        lastFileOffset = max(self.files[n].offset for n in self.files.iterkeys())
+        if fileheader.offset == lastFileOffset: # last file
+            self.archive.truncate(fileheader.offset - _FILE_HEADER_STRUCT.size)
+        else:
+            archiveWrite = open(self.archive.name, "r+b")
+            archiveWrite.seek(fileheader.offset - _FILE_HEADER_STRUCT.size)
+            nextFileOffset = fileheader.offset + fileheader.size + 1
+            self.archive.seek(nextFileOffset)
+            self._copyFileData(self.archive, archiveWrite)
+            archiveWrite.truncate()
+            archiveWrite.close()
+
+        del self.files[name]
+
+    def addfile(self, arinfo, fileobj=None):
+        if self.mode == "r":
+            raise ArError("Cannot add files in read mode")
+        if not fileobj and not hasattr(self.archive, "seek"):
+            raise ArError("Direct writing requires a target with seek()")
+        if len(arinfo.name) > 16:
+            raise ArError("Long filenames are not supported")
+        if arinfo.name in self.files:
+            raise ArError("Cannot add file '%s' because it already exists" % arinfo.name)
+        self._endFile()
+
+        self.archive.seek(0, os.SEEK_END)
+        here = self.archive.tell()
+        self.archive.write(arinfo.getHeader())
+        dataOffset = self.archive.tell()
+        if fileobj:
+            self._copyFileData(fileobj, self.archive, arinfo.size)
+            if int(arinfo.size) % 2 == 1:
+                self.archive.write("\n")
+        else:
+            # allow user to write() directly, just leave a signal for
+            # _endFile to clean up afterwards
+            self.lastOpen = here
+
+        arinfo.offset = dataOffset
+        self.files[arinfo.name] = arinfo
+
+    def write(self, str):
+        self.archive.write(str)
+
+    def _endFile(self):
+        ''' Overwrite correct size to last header '''
+        if self.lastOpen:
+            end = self.archive.tell()
+            self.archive.seek(self.lastOpen)
+            hdata = [field.strip() for field in _FILE_HEADER_STRUCT.unpack(self.archive.read(_FILE_HEADER_STRUCT.size))]
+            fileheader = ArInfo(list(hdata) + [self.archive.tell()])
+            fileheader.size = end - fileheader.offset
+            self.archive.seek(self.lastOpen)
+            self.archive.write(fileheader.getHeader())
+            self.archive.seek(end)
+            if int(fileheader.size) % 2 == 1:
+                self.archive.write("\n")
+            self.lastOpen = None
+            self.files[fileheader.name] = fileheader
+
+    def _readHeaders(self):
+        '''
+        @TODO: use name record file
+        '''
+        if self.archive.read(len(_GLOBAL_HEADER)) != _GLOBAL_HEADER:
+            raise ArError("File is not an ar-archive: global header not matching")
+
+        headerdata = self.archive.read(_FILE_HEADER_STRUCT.size)
+        if not headerdata:
+            raise ArError("File corrupted: file header not found")
+        while headerdata:
+            hdata = [field.strip() for field in _FILE_HEADER_STRUCT.unpack(headerdata)]
+            fileheader = ArInfo(hdata + [self.archive.tell()])
+            if fileheader.name.startswith("/"):
+                raise ArError("Long filenames are not supported")
+            self.files[fileheader.name] = fileheader
+
+            skip = int(fileheader.size)
+            if skip % 2 == 1:
+                skip += 1
+            self.archive.seek(skip, os.SEEK_CUR)
+            headerdata = self.archive.read(_FILE_HEADER_STRUCT.size)
+
+    def getNames(self):
+        ''' Returns list of names in archive '''
+        return self.files.keys()
+
+    def extract(self, filename, path_or_fileobj=""):
+        fileheader = self.files.get(filename)
+        if not fileheader:
+            raise ArError("File '%s' not found from archive" % filename)
+        self._writeFile(fileheader, path_or_fileobj)
+
+    def extractall(self, path=""):
+        '''
+        Extract all members to directory I{path}
+        @param path: Directory
+        @type path: String
+        '''
+        assert os.path.isdir(path), "%s is not a directory" % path
+        for header in self.files.itervalues():
+            self._writeFile(header, path)
+
+    def _writeFile(self, fileheader, path_or_fileobj):
+        self.archive.seek(fileheader.offset)
+        if isinstance(path_or_fileobj, basestring):
+            with open(os.path.join(path_or_fileobj, fileheader.name), "wb") as dstFile:
+                self._copyFileData(self.archive, dstFile, fileheader.size)
+                dstFile.close()
+        else:
+            self._copyFileData(self.archive, path_or_fileobj, fileheader.size)
+
+    @staticmethod
+    def _copyFileData(src, dst, size=None, blocksize=32*1024):
+        ''' Copy data from source file to destination file '''
+        bytesread = 0
+        while size is None or bytesread < size:
+            if size and (bytesread + blocksize) >= size:
+                blocksize = size - bytesread
+            buf = src.read(blocksize)
+            bytesread += blocksize
+            if not buf:
+                break
+            dst.write(buf)
+
+    def extractfile(self, *filenames):
+        '''
+        Read member file(s) as from a file-like object. Mimics tarfile's
+        extractfile() by returning the handle, but in fact just returns self.
+
+        @param filenames: Member files to read
+        @type filenames: String
+        @return file-like object for reading
+        '''
+        try:
+            self.filesToRead = [self.files[f] for f in filenames]
+        except IndexError:
+            raise ArError("Cannot extractfile, no such archive member(s): '%s'" % ', '.join(filenames))
+        self.filesToRead.sort(cmp=lambda x, y: cmp(x.offset, y.offset))
+        self._endFile()
+        self.seek(0)
+        return self
+
+    def seek(self, offset=0, whence=0):
+        if not self.filesToRead:
+            raise ArError("seek() supported only when reading files, use extractfile()")
+        if whence == 0:
+            if offset == 0:
+                self.archive.seek(self.filesToRead[0].offset)
+            else:
+                i = 0
+                while offset > self.filesToRead[i].size:
+                    if i+1 > len(self.filesToRead):
+                        break
+                    else:
+                        offset = offset - self.filesToRead[i].size
+                        i = i + 1
+                self.archive.seek(self.filesToRead[i].offset + offset)
+        elif whence == 1:
+            self.seek(self.tell() + offset, 0)
+        elif whence == 2:
+            self.seek(sum([member.size for member in self.filesToRead]) + offset, 0)
+        else:
+            raise ArError("seek() got invalid value for whence")
+
+    def _tellCurrentMember(self):
+        if not self.filesToRead:
+            raise ArError("No files to read. Use extractfile()")
+        i = 0
+        while i+1 < len(self.filesToRead):
+            if self.archive.tell() < self.filesToRead[i].offset:
+                # Position is outside data sections
+                raise ArError("Internal error, invalid position in archive")
+            elif self.archive.tell() < self.filesToRead[i].offset + self.filesToRead[i].size:
+                break
+            else:
+                i = i + 1
+        return i
+
+    def tell(self):
+        i = self._tellCurrentMember()
+        return sum([member.size for member in self.filesToRead[:i]]) + self.archive.tell() - self.filesToRead[i].offset
+
+    def read(self, size=32*1024):
+        '''
+        Read member files sequentially in I{size} byte chunks. This can be done
+        after calling extractfile()
+
+        @param size: Bytes to read
+        @type size: Integer
+        '''
+        i = self._tellCurrentMember()
+        end = self.filesToRead[i].offset + self.filesToRead[i].size
+        # End of a member file
+        if self.archive.tell() + size >= end:
+            remainder = end - self.archive.tell()
+            leftOver = size - remainder
+            buf = self.archive.read(remainder)
+            if i+1 < len(self.filesToRead) and leftOver:
+                self.archive.seek(self.filesToRead[i+1].offset)
+                buf += self.read(leftOver)
+        # Middle of a file
+        else:
+            buf = self.archive.read(size)
+        return buf