duplicity-team team mailing list archive

Thread
Date

[Merge] lp:~mterry/duplicity/tarfile into lp:duplicity

To: mp+72422@xxxxxxxxxxxxxxxxxx
From: Michael Terry <michael.terry@xxxxxxxxxxxxx>
Date: Tue, 23 Aug 2011 18:33:39 -0000
Reply-to: mp+72422@xxxxxxxxxxxxxxxxxx
Sender: bounces@xxxxxxxxxxxxx

Michael Terry has proposed merging lp:~mterry/duplicity/tarfile into lp:duplicity.

Requested reviews:
  duplicity-team (duplicity-team)
Related bugs:
  Bug #485219 in Duplicity: "Fixed fields are not fixed, leading to buffer overflows and tar explosions."
  https://bugs.launchpad.net/duplicity/+bug/485219

For more details, see:
https://code.launchpad.net/~mterry/duplicity/tarfile/+merge/72422
-- 
https://code.launchpad.net/~mterry/duplicity/tarfile/+merge/72422
Your team duplicity-team is requested to review the proposed merge of lp:~mterry/duplicity/tarfile into lp:duplicity.

=== modified file 'duplicity/diffdir.py'
--- duplicity/diffdir.py	2011-06-13 15:40:19 +0000
+++ duplicity/diffdir.py	2011-08-23 18:28:26 +0000
@@ -29,6 +29,7 @@
 
 import cStringIO, types
 from duplicity import statistics
+from duplicity import util
 from duplicity.path import * #@UnusedWildImport
 from duplicity.lazy import * #@UnusedWildImport
 
@@ -181,7 +182,7 @@
     """
     collated = collate2iters(new_iter, sig_iter)
     if sig_fileobj:
-        sigTarFile = tarfile.TarFile("arbitrary", "w", sig_fileobj)
+        sigTarFile = util.make_tarfile("w", sig_fileobj)
     else:
         sigTarFile = None
     for new_path, sig_path in collated:
@@ -224,16 +225,17 @@
     """
     Convert signature tar file object open for reading into path iter
     """
-    tf = tarfile.TarFile("Arbitrary Name", "r", sigtarobj)
-    tf.debug = 2
+    tf = util.make_tarfile("r", sigtarobj)
+    tf.debug = 1
     for tarinfo in tf:
+        tiname = util.get_tarinfo_name(tarinfo)
         for prefix in ["signature/", "snapshot/", "deleted/"]:
-            if tarinfo.name.startswith(prefix):
-                # strip prefix and from name and set it to difftype
-                name, difftype = tarinfo.name[len(prefix):], prefix[:-1]
+            if tiname.startswith(prefix):
+                # strip prefix and '/' from name and set it to difftype
+                name, difftype = tiname[len(prefix):], prefix[:-1]
                 break
         else:
-            raise DiffDirException("Bad tarinfo name %s" % (tarinfo.name,))
+            raise DiffDirException("Bad tarinfo name %s" % (tiname,))
 
         index = tuple(name.split("/"))
         if not index[-1]:
@@ -464,16 +466,12 @@
         self.remember_value = None          # holds index of next block
         self.remember_block = None          # holds block of next block
 
-        # We need to instantiate a dummy TarFile just to get access to
-        # some of the functions like _get_full_headers.
-        self.tf = tarfile.TarFromIterator(None)
-
     def tarinfo2tarblock(self, index, tarinfo, file_data = ""):
         """
         Make tarblock out of tarinfo and file data
         """
         tarinfo.size = len(file_data)
-        headers = self.tf._get_full_headers(tarinfo)
+        headers = tarinfo.tobuf()
         blocks, remainder = divmod(tarinfo.size, tarfile.BLOCKSIZE) #@UnusedVariable
         if remainder > 0:
             filler_data = "\0" * (tarfile.BLOCKSIZE - remainder)

=== modified file 'duplicity/dup_temp.py'
--- duplicity/dup_temp.py	2011-08-18 19:17:55 +0000
+++ duplicity/dup_temp.py	2011-08-23 18:28:26 +0000
@@ -207,6 +207,18 @@
         """
         return self.fileobj.read(length)
 
+    def tell(self):
+        """
+        Returns current location of fileobj
+        """
+        return self.fileobj.tell()
+
+    def seek(self, offset):
+        """
+        Seeks to a location of fileobj
+        """
+        return self.fileobj.seek(offset)
+
     def close(self):
         """
         Close fileobj, running hooks right afterwards

=== modified file 'duplicity/gpg.py'
--- duplicity/gpg.py	2011-07-16 18:37:47 +0000
+++ duplicity/gpg.py	2011-08-23 18:28:26 +0000
@@ -96,6 +96,7 @@
         self.logger_fp = tempfile.TemporaryFile()
         self.stderr_fp = tempfile.TemporaryFile()
         self.name = encrypt_path
+        self.byte_count = 0
 
         # Start GPG process - copied from GnuPGInterface docstring.
         gnupg = GnuPGInterface.GnuPG()
@@ -157,6 +158,8 @@
     def read(self, length = -1):
         try:
             res = self.gpg_output.read(length)
+            if res is not None:
+                self.byte_count += len(res)
         except Exception:
             self.gpg_failed()
         return res
@@ -164,10 +167,21 @@
     def write(self, buf):
         try:
             res = self.gpg_input.write(buf)
+            if res is not None:
+                self.byte_count += len(res)
         except Exception:
             self.gpg_failed()
         return res
 
+    def tell(self):
+        return self.byte_count
+
+    def seek(self, offset):
+        assert not self.encrypt
+        assert offset >= self.byte_count, "%d < %d" % (offset, self.byte_count)
+        if offset > self.byte_count:
+            self.read(offset - self.byte_count)
+
     def gpg_failed(self):
         msg = "GPG Failed, see log below:\n"
         msg += "===== Begin GnuPG log =====\n"

=== modified file 'duplicity/patchdir.py'
--- duplicity/patchdir.py	2011-03-06 12:30:14 +0000
+++ duplicity/patchdir.py	2011-08-23 18:28:26 +0000
@@ -141,8 +141,9 @@
     """Return (index, difftype, multivol) pair from tarinfo object"""
     for prefix in ["snapshot/", "diff/", "deleted/",
                    "multivol_diff/", "multivol_snapshot/"]:
-        if tarinfo.name.startswith( prefix ):
-            name = tarinfo.name[len( prefix ):] # strip prefix
+        tiname = util.get_tarinfo_name( tarinfo )
+        if tiname.startswith( prefix ):
+            name = tiname[len( prefix ):] # strip prefix
             if prefix.startswith( "multivol" ):
                 if prefix == "multivol_diff/":
                     difftype = "diff"
@@ -150,28 +151,28 @@
                     difftype = "snapshot"
                 multivol = 1
                 name, num_subs = \
-                      re.subn( "(?s)^multivol_(diff|snapshot)/(.*)/[0-9]+$",
-                              "\\2", tarinfo.name )
+                      re.subn( "(?s)^multivol_(diff|snapshot)/?(.*)/[0-9]+$",
+                              "\\2", tiname )
                 if num_subs != 1:
                     raise PatchDirException( "Unrecognized diff entry %s" %
-                                            ( tarinfo.name, ) )
+                                            ( tiname, ) )
             else:
                 difftype = prefix[:-1] # strip trailing /
-                name = tarinfo.name[len( prefix ):]
+                name = tiname[len( prefix ):]
                 if name.endswith( "/" ):
                     name = name[:-1] # strip trailing /'s
                 multivol = 0
             break
     else:
         raise PatchDirException( "Unrecognized diff entry %s" %
-                                 ( tarinfo.name, ) )
+                                 ( tiname, ) )
     if name == "." or name == "":
         index = ()
     else:
         index = tuple( name.split( "/" ) )
         if '..' in index:
             raise PatchDirException( "Tar entry %s contains '..'.  Security "
-                                    "violation" % ( tarinfo.name, ) )
+                                    "violation" % ( tiname, ) )
     return ( index, difftype, multivol )
 
 
@@ -320,7 +321,7 @@
         if self.current_fp:
             assert not self.current_fp.close()
         self.current_fp = self.fileobj_iter.next()
-        self.tarfile = tarfile.TarFile( "arbitrary", "r", self.current_fp )
+        self.tarfile = util.make_tarfile("r", self.current_fp)
         self.tar_iter = iter( self.tarfile )
 
     def next( self ):

=== modified file 'duplicity/path.py'
--- duplicity/path.py	2011-08-18 20:08:56 +0000
+++ duplicity/path.py	2011-08-23 18:28:26 +0000
@@ -26,12 +26,12 @@
 
 """
 
-import stat, errno, socket, time, re, gzip
+import stat, errno, socket, time, re, gzip, pwd, grp
 
+from duplicity import tarfile
 from duplicity import file_naming
 from duplicity import globals
 from duplicity import gpg
-from duplicity import tarfile
 from duplicity import util
 from duplicity import librsync
 from duplicity import log #@UnusedImport
@@ -206,13 +206,13 @@
         try:
             if globals.numeric_owner:
                 raise KeyError
-            self.stat.st_uid = tarfile.uname2uid(tarinfo.uname)
+            self.stat.st_uid = pwd.getpwnam(tarinfo.uname)[2]
         except KeyError:
             self.stat.st_uid = tarinfo.uid
         try:
             if globals.numeric_owner:
                 raise KeyError
-            self.stat.st_gid = tarfile.gname2gid(tarinfo.gname)
+            self.stat.st_gid = grp.getgrnam(tarinfo.gname)[2]
         except KeyError:
             self.stat.st_gid = tarinfo.gid
 
@@ -284,13 +284,13 @@
                 ti.mtime = int(self.stat.st_mtime)
 
             try:
-                ti.uname = tarfile.uid2uname(ti.uid)
+                ti.uname = pwd.getpwuid(ti.uid)[0]
             except KeyError:
-                pass
+                ti.uname = ''
             try:
-                ti.gname = tarfile.gid2gname(ti.gid)
+                ti.gname = grp.getgrgid(ti.gid)[0]
             except KeyError:
-                pass
+                ti.gname = ''
 
             if ti.type in (tarfile.CHRTYPE, tarfile.BLKTYPE):
                 if hasattr(os, "major") and hasattr(os, "minor"):

=== modified file 'duplicity/tarfile.py'
--- duplicity/tarfile.py	2011-06-23 15:47:44 +0000
+++ duplicity/tarfile.py	2011-08-23 18:28:26 +0000
@@ -1,17 +1,9 @@
-#!/usr/bin/env python
-# -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*-
+#! /usr/bin/python2.7
+# -*- coding: iso-8859-1 -*-
 #-------------------------------------------------------------------
 # tarfile.py
-#
-# Module for reading and writing .tar and tar.gz files.
-#
-# Needs at least Python version 2.2.
-#
-# Please consult the html documentation in this distribution
-# for further details on how to use tarfile.
-#
 #-------------------------------------------------------------------
-# Copyright (C) 2002 Lars Gustabel <lars@xxxxxxxxxxxx>
+# Copyright (C) 2002 Lars Gustäbel <lars@xxxxxxxxxxxx>
 # All rights reserved.
 #
 # Permission  is  hereby granted,  free  of charge,  to  any person
@@ -38,98 +30,105 @@
 """Read from and write to tar format archives.
 """
 
-__version__ = "$Revision: 1.8 $"
-# $Source: /sources/duplicity/duplicity/duplicity/tarfile.py,v $
+__version__ = "$Revision: 85213 $"
+# $Source$
 
-version     = "0.4.9"
-__author__  = "Lars Gustabel (lars@xxxxxxxxxxxx)"
-__date__    = "$Date: 2008/11/16 18:48:15 $"
-__cvsid__   = "$Id: tarfile.py,v 1.8 2008/11/16 18:48:15 loafman Exp $"
-__credits__ = "Gustavo Niemeyer for his support, " \
-              "Detlef Lannert for some early contributions"
+version     = "0.9.0"
+__author__  = "Lars Gustäbel (lars@xxxxxxxxxxxx)"
+__date__    = "$Date: 2010-10-04 10:37:53 -0500 (Mon, 04 Oct 2010) $"
+__cvsid__   = "$Id: tarfile.py 85213 2010-10-04 15:37:53Z lars.gustaebel $"
+__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
 
 #---------
 # Imports
 #---------
 import sys
 import os
-import __builtin__
 import shutil
 import stat
 import errno
 import time
+import struct
+import copy
+import re
+import operator
 
 try:
     import grp, pwd
 except ImportError:
     grp = pwd = None
-# These are used later to cache user and group names and ids
-gname_dict = uname_dict = uid_dict = gid_dict = None
-
-# We won't need this anymore in Python 2.3
-#
-# We import the _tarfile extension, that contains
-# some useful functions to handle devices and symlinks.
-# We inject them into os module, as if we were under 2.3.
-#
-try:
-    import _tarfile
-    if _tarfile.mknod is None:
-        _tarfile = None
-except ImportError:
-    _tarfile = None
-if _tarfile and not hasattr(os, "mknod"):
-    os.mknod = _tarfile.mknod
-if _tarfile and not hasattr(os, "major"):
-    os.major = _tarfile.major
-if _tarfile and not hasattr(os, "minor"):
-    os.minor = _tarfile.minor
-if _tarfile and not hasattr(os, "makedev"):
-    os.makedev = _tarfile.makedev
-if _tarfile and not hasattr(os, "lchown"):
-    os.lchown = _tarfile.lchown
-
-# XXX remove for release (2.3)
-if sys.version_info[:2] < (2,3):
-    True  = 1
-    False = 0
-
-#---------------------------------------------------------
-# GNUtar constants
-#---------------------------------------------------------
-BLOCKSIZE  = 512                # length of processing blocks
+
+# from tarfile import *
+__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
+
+#---------------------------------------------------------
+# tar constants
+#---------------------------------------------------------
+NUL = "\0"                      # the null character
+BLOCKSIZE = 512                 # length of processing blocks
 RECORDSIZE = BLOCKSIZE * 20     # length of records
-MAGIC      = "ustar"            # magic tar string
-VERSION    = "00"               # version number
-
-LENGTH_NAME = 100               # maximal length of a filename
-LENGTH_LINK = 100               # maximal length of a linkname
-
-REGTYPE  = "0"                  # regular file
+GNU_MAGIC = "ustar  \0"         # magic gnu tar string
+POSIX_MAGIC = "ustar\x0000"     # magic posix tar string
+
+LENGTH_NAME = 100               # maximum length of a filename
+LENGTH_LINK = 100               # maximum length of a linkname
+LENGTH_PREFIX = 155             # maximum length of the prefix field
+
+REGTYPE = "0"                   # regular file
 AREGTYPE = "\0"                 # regular file
-LNKTYPE  = "1"                  # link (inside tarfile)
-SYMTYPE  = "2"                  # symbolic link
-CHRTYPE  = "3"                  # character special device
-BLKTYPE  = "4"                  # block special device
-DIRTYPE  = "5"                  # directory
+LNKTYPE = "1"                   # link (inside tarfile)
+SYMTYPE = "2"                   # symbolic link
+CHRTYPE = "3"                   # character special device
+BLKTYPE = "4"                   # block special device
+DIRTYPE = "5"                   # directory
 FIFOTYPE = "6"                  # fifo special device
 CONTTYPE = "7"                  # contiguous file
 
-GNUTYPE_LONGNAME = "L"          # GNU tar extension for longnames
-GNUTYPE_LONGLINK = "K"          # GNU tar extension for longlink
-GNUTYPE_SPARSE   = "S"          # GNU tar extension for sparse file
+GNUTYPE_LONGNAME = "L"          # GNU tar longname
+GNUTYPE_LONGLINK = "K"          # GNU tar longlink
+GNUTYPE_SPARSE = "S"            # GNU tar sparse file
+
+XHDTYPE = "x"                   # POSIX.1-2001 extended header
+XGLTYPE = "g"                   # POSIX.1-2001 global header
+SOLARIS_XHDTYPE = "X"           # Solaris extended header
+
+USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
+GNU_FORMAT = 1                  # GNU tar format
+PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
+DEFAULT_FORMAT = GNU_FORMAT
 
 #---------------------------------------------------------
 # tarfile constants
 #---------------------------------------------------------
-SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,  # file types that tarfile
-                   SYMTYPE, DIRTYPE, FIFOTYPE,  # can cope with.
-                   CONTTYPE, GNUTYPE_LONGNAME,
-                   GNUTYPE_LONGLINK, GNUTYPE_SPARSE,
-                   CHRTYPE, BLKTYPE)
-
-REGULAR_TYPES = (REGTYPE, AREGTYPE,             # file types that somehow
-                 CONTTYPE, GNUTYPE_SPARSE)      # represent regular files
+# File types that tarfile supports:
+SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
+                   SYMTYPE, DIRTYPE, FIFOTYPE,
+                   CONTTYPE, CHRTYPE, BLKTYPE,
+                   GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
+                   GNUTYPE_SPARSE)
+
+# File types that will be treated as a regular file.
+REGULAR_TYPES = (REGTYPE, AREGTYPE,
+                 CONTTYPE, GNUTYPE_SPARSE)
+
+# File types that are part of the GNU tar format.
+GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
+             GNUTYPE_SPARSE)
+
+# Fields from a pax header that override a TarInfo attribute.
+PAX_FIELDS = ("path", "linkpath", "size", "mtime",
+              "uid", "gid", "uname", "gname")
+
+# Fields in a pax header that are numbers, all other fields
+# are treated as strings.
+PAX_NUMBER_FIELDS = {
+    "atime": float,
+    "ctime": float,
+    "mtime": float,
+    "uid": int,
+    "gid": int,
+    "size": int
+}
 
 #---------------------------------------------------------
 # Bits used in the mode field, values in octal.
@@ -145,34 +144,117 @@
 TSGID   = 02000          # set GID on execution
 TSVTX   = 01000          # reserved
 
-TUREAD  = 00400          # read by owner
-TUWRITE = 00200          # write by owner
-TUEXEC  = 00100          # execute/search by owner
-TGREAD  = 00040          # read by group
-TGWRITE = 00020          # write by group
-TGEXEC  = 00010          # execute/search by group
-TOREAD  = 00004          # read by other
-TOWRITE = 00002          # write by other
-TOEXEC  = 00001          # execute/search by other
+TUREAD  = 0400           # read by owner
+TUWRITE = 0200           # write by owner
+TUEXEC  = 0100           # execute/search by owner
+TGREAD  = 0040           # read by group
+TGWRITE = 0020           # write by group
+TGEXEC  = 0010           # execute/search by group
+TOREAD  = 0004           # read by other
+TOWRITE = 0002           # write by other
+TOEXEC  = 0001           # execute/search by other
+
+#---------------------------------------------------------
+# initialization
+#---------------------------------------------------------
+ENCODING = sys.getfilesystemencoding()
+if ENCODING is None:
+    ENCODING = sys.getdefaultencoding()
 
 #---------------------------------------------------------
 # Some useful functions
 #---------------------------------------------------------
+
+def stn(s, length):
+    """Convert a python string to a null-terminated string buffer.
+    """
+    return s[:length] + (length - len(s)) * NUL
+
 def nts(s):
-    """Convert a null-terminated string buffer to a python string.
-    """
-    return s.split("\0", 1)[0]
-
-def calc_chksum(buf):
-    """Calculate the checksum for a member's header. It's a simple addition
-       of all bytes, treating the chksum field as if filled with spaces.
-       buf is a 512 byte long string buffer which holds the header.
-    """
-    chk = 256                           # chksum field is treated as blanks,
-                                        # so the initial value is 8 * ord(" ")
-    for c in buf[:148]: chk += ord(c)   # sum up all bytes before chksum
-    for c in buf[156:]: chk += ord(c)   # sum up all bytes after chksum
-    return chk
+    """Convert a null-terminated string field to a python string.
+    """
+    # Use the string up to the first null char.
+    p = s.find("\0")
+    if p == -1:
+        return s
+    return s[:p]
+
+def nti(s):
+    """Convert a number field to a python number.
+    """
+    # There are two possible encodings for a number field, see
+    # itn() below.
+    if s[0] != chr(0200):
+        try:
+            n = int(nts(s) or "0", 8)
+        except ValueError:
+            raise InvalidHeaderError("invalid header")
+    else:
+        n = 0L
+        for i in xrange(len(s) - 1):
+            n <<= 8
+            n += ord(s[i + 1])
+    return n
+
+def itn(n, digits=8, format=DEFAULT_FORMAT):
+    """Convert a python number to a number field.
+    """
+    # POSIX 1003.1-1988 requires numbers to be encoded as a string of
+    # octal digits followed by a null-byte, this allows values up to
+    # (8**(digits-1))-1. GNU tar allows storing numbers greater than
+    # that if necessary. A leading 0200 byte indicates this particular
+    # encoding, the following digits-1 bytes are a big-endian
+    # representation. This allows values up to (256**(digits-1))-1.
+    if 0 <= n < 8 ** (digits - 1):
+        s = "%0*o" % (digits - 1, n) + NUL
+    else:
+        if format != GNU_FORMAT or n >= 256 ** (digits - 1):
+            raise ValueError("overflow in number field")
+
+        if n < 0:
+            # XXX We mimic GNU tar's behaviour with negative numbers,
+            # this could raise OverflowError.
+            n = struct.unpack("L", struct.pack("l", n))[0]
+
+        s = ""
+        for i in xrange(digits - 1):
+            s = chr(n & 0377) + s
+            n >>= 8
+        s = chr(0200) + s
+    return s
+
+def uts(s, encoding, errors):
+    """Convert a unicode object to a string.
+    """
+    if errors == "utf-8":
+        # An extra error handler similar to the -o invalid=UTF-8 option
+        # in POSIX.1-2001. Replace untranslatable characters with their
+        # UTF-8 representation.
+        try:
+            return s.encode(encoding, "strict")
+        except UnicodeEncodeError:
+            x = []
+            for c in s:
+                try:
+                    x.append(c.encode(encoding, "strict"))
+                except UnicodeEncodeError:
+                    x.append(c.encode("utf8"))
+            return "".join(x)
+    else:
+        return s.encode(encoding, errors)
+
+def calc_chksums(buf):
+    """Calculate the checksum for a member's header by summing up all
+       characters except for the chksum field which is treated as if
+       it was filled with spaces. According to the GNU tar sources,
+       some tars (Sun and NeXT) calculate chksum with signed char,
+       which will be different if there are chars in the buffer with
+       the high bit set. So we calculate two checksums, unsigned and
+       signed.
+    """
+    unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
+    signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
+    return unsigned_chksum, signed_chksum
 
 def copyfileobj(src, dst, length=None):
     """Copy length bytes from fileobj src to fileobj dst.
@@ -186,242 +268,1186 @@
 
     BUFSIZE = 16 * 1024
     blocks, remainder = divmod(length, BUFSIZE)
-    for b in range(blocks): #@UnusedVariable
+    for b in xrange(blocks):
         buf = src.read(BUFSIZE)
         if len(buf) < BUFSIZE:
-            raise IOError, "end of file reached"
+            raise IOError("end of file reached")
         dst.write(buf)
 
     if remainder != 0:
         buf = src.read(remainder)
         if len(buf) < remainder:
-            raise IOError, "end of file reached"
+            raise IOError("end of file reached")
         dst.write(buf)
     return
 
 filemode_table = (
-    (S_IFLNK, "l",
-     S_IFREG, "-",
-     S_IFBLK, "b",
-     S_IFDIR, "d",
-     S_IFCHR, "c",
-     S_IFIFO, "p"),
-    (TUREAD,  "r"),
-    (TUWRITE, "w"),
-    (TUEXEC,  "x", TSUID, "S", TUEXEC|TSUID, "s"),
-    (TGREAD,  "r"),
-    (TGWRITE, "w"),
-    (TGEXEC,  "x", TSGID, "S", TGEXEC|TSGID, "s"),
-    (TOREAD,  "r"),
-    (TOWRITE, "w"),
-    (TOEXEC,  "x", TSVTX, "T", TOEXEC|TSVTX, "t"))
+    ((S_IFLNK,      "l"),
+     (S_IFREG,      "-"),
+     (S_IFBLK,      "b"),
+     (S_IFDIR,      "d"),
+     (S_IFCHR,      "c"),
+     (S_IFIFO,      "p")),
+
+    ((TUREAD,       "r"),),
+    ((TUWRITE,      "w"),),
+    ((TUEXEC|TSUID, "s"),
+     (TSUID,        "S"),
+     (TUEXEC,       "x")),
+
+    ((TGREAD,       "r"),),
+    ((TGWRITE,      "w"),),
+    ((TGEXEC|TSGID, "s"),
+     (TSGID,        "S"),
+     (TGEXEC,       "x")),
+
+    ((TOREAD,       "r"),),
+    ((TOWRITE,      "w"),),
+    ((TOEXEC|TSVTX, "t"),
+     (TSVTX,        "T"),
+     (TOEXEC,       "x"))
+)
 
 def filemode(mode):
     """Convert a file's mode to a string of the form
        -rwxrwxrwx.
        Used by TarFile.list()
     """
-    s = ""
-    for t in filemode_table:
-        while 1:
-            if mode & t[0] == t[0]:
-                s += t[1]
-            elif len(t) > 2:
-                t = t[2:]
-                continue
-            else:
-                s += "-"
-            break
-    return s
-
-if os.sep != "/":
-    normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
-else:
-    normpath = os.path.normpath
+    perm = []
+    for table in filemode_table:
+        for bit, char in table:
+            if mode & bit == bit:
+                perm.append(char)
+                break
+        else:
+            perm.append("-")
+    return "".join(perm)
 
 class TarError(Exception):
-    """Internally used exception"""
-    pass
-
-#--------------------
-# exported functions
-#--------------------
-def open(name, mode="r", fileobj=None):
-    """Open (uncompressed) tar archive name for reading, writing
-       or appending.
-    """
-    return TarFile(name, mode, fileobj)
-
-def gzopen(gzname, gzmode="r", compresslevel=9, fileobj=None):
-    """Open gzip compressed tar archive name for reading or writing.
-       Appending is not allowed.
-    """
-    if gzmode == "a":
-        raise ValueError, "Appending to gzipped archive is not allowed"
-    import gzip
-    pre, ext = os.path.splitext(gzname)
-    pre = os.path.basename(pre)
-    if ext == ".tgz":
-        ext = ".tar"
-    if ext == ".gz":
-        ext = ""
-    tarname = pre + ext
-    mode = gzmode
-    if "b" not in gzmode:
-        gzmode += "b"
-    if mode[0:1] == "w":
-        if not fileobj:
-            fileobj = __builtin__.file(gzname, gzmode)
-        t = TarFile(tarname, mode, gzip.GzipFile(tarname, gzmode,
-                                                 compresslevel, fileobj))
-    else:
-        t = TarFile(tarname, mode, gzip.open(gzname, gzmode, compresslevel))
-    t._extfileobj = 0
-    return t
-
-def is_tarfile(name):
-    """Return True if name points to a tar archive that we
-       are able to handle, else return False.
-    """
-
-    buftoinfo = TarFile.__dict__["_buftoinfo"]
-    try:
-        buf = __builtin__.open(name, "rb").read(BLOCKSIZE)
-        buftoinfo(None, buf)
-        return True
-    except (ValueError, ImportError):
-        pass
-    try:
-        import gzip
-        buf = gzip.open(name, "rb").read(BLOCKSIZE)
-        buftoinfo(None, buf)
-        return True
-    except (IOError, ValueError, ImportError):
-        pass
-    return False
+    """Base exception."""
+    pass
+class ExtractError(TarError):
+    """General exception for extract errors."""
+    pass
+class ReadError(TarError):
+    """Exception for unreadble tar archives."""
+    pass
+class CompressionError(TarError):
+    """Exception for unavailable compression methods."""
+    pass
+class StreamError(TarError):
+    """Exception for unsupported operations on stream-like TarFiles."""
+    pass
+class HeaderError(TarError):
+    """Base exception for header errors."""
+    pass
+class EmptyHeaderError(HeaderError):
+    """Exception for empty headers."""
+    pass
+class TruncatedHeaderError(HeaderError):
+    """Exception for truncated headers."""
+    pass
+class EOFHeaderError(HeaderError):
+    """Exception for end of file headers."""
+    pass
+class InvalidHeaderError(HeaderError):
+    """Exception for invalid headers."""
+    pass
+class SubsequentHeaderError(HeaderError):
+    """Exception for missing and invalid extended headers."""
+    pass
+
+#---------------------------
+# internal stream interface
+#---------------------------
+class _LowLevelFile:
+    """Low-level file object. Supports reading and writing.
+       It is used instead of a regular file object for streaming
+       access.
+    """
+
+    def __init__(self, name, mode):
+        mode = {
+            "r": os.O_RDONLY,
+            "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
+        }[mode]
+        if hasattr(os, "O_BINARY"):
+            mode |= os.O_BINARY
+        self.fd = os.open(name, mode, 0666)
+
+    def close(self):
+        os.close(self.fd)
+
+    def read(self, size):
+        return os.read(self.fd, size)
+
+    def write(self, s):
+        os.write(self.fd, s)
+
+class _Stream:
+    """Class that serves as an adapter between TarFile and
+       a stream-like object.  The stream-like object only
+       needs to have a read() or write() method and is accessed
+       blockwise.  Use of gzip or bzip2 compression is possible.
+       A stream-like object could be for example: sys.stdin,
+       sys.stdout, a socket, a tape device etc.
+
+       _Stream is intended to be used only internally.
+    """
+
+    def __init__(self, name, mode, comptype, fileobj, bufsize):
+        """Construct a _Stream object.
+        """
+        self._extfileobj = True
+        if fileobj is None:
+            fileobj = _LowLevelFile(name, mode)
+            self._extfileobj = False
+
+        if comptype == '*':
+            # Enable transparent compression detection for the
+            # stream interface
+            fileobj = _StreamProxy(fileobj)
+            comptype = fileobj.getcomptype()
+
+        self.name     = name or ""
+        self.mode     = mode
+        self.comptype = comptype
+        self.fileobj  = fileobj
+        self.bufsize  = bufsize
+        self.buf      = ""
+        self.pos      = 0L
+        self.closed   = False
+
+        if comptype == "gz":
+            try:
+                import zlib
+            except ImportError:
+                raise CompressionError("zlib module is not available")
+            self.zlib = zlib
+            self.crc = zlib.crc32("") & 0xffffffffL
+            if mode == "r":
+                self._init_read_gz()
+            else:
+                self._init_write_gz()
+
+        if comptype == "bz2":
+            try:
+                import bz2
+            except ImportError:
+                raise CompressionError("bz2 module is not available")
+            if mode == "r":
+                self.dbuf = ""
+                self.cmp = bz2.BZ2Decompressor()
+            else:
+                self.cmp = bz2.BZ2Compressor()
+
+    def __del__(self):
+        if hasattr(self, "closed") and not self.closed:
+            self.close()
+
+    def _init_write_gz(self):
+        """Initialize for writing with gzip compression.
+        """
+        self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
+                                            -self.zlib.MAX_WBITS,
+                                            self.zlib.DEF_MEM_LEVEL,
+                                            0)
+        timestamp = struct.pack("<L", long(time.time()))
+        self.__write("\037\213\010\010%s\002\377" % timestamp)
+        if self.name.endswith(".gz"):
+            self.name = self.name[:-3]
+        self.__write(self.name + NUL)
+
+    def write(self, s):
+        """Write string s to the stream.
+        """
+        if self.comptype == "gz":
+            self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
+        self.pos += len(s)
+        if self.comptype != "tar":
+            s = self.cmp.compress(s)
+        self.__write(s)
+
+    def __write(self, s):
+        """Write string s to the stream if a whole new block
+           is ready to be written.
+        """
+        self.buf += s
+        while len(self.buf) > self.bufsize:
+            self.fileobj.write(self.buf[:self.bufsize])
+            self.buf = self.buf[self.bufsize:]
+
+    def close(self):
+        """Close the _Stream object. No operation should be
+           done on it afterwards.
+        """
+        if self.closed:
+            return
+
+        if self.mode == "w" and self.comptype != "tar":
+            self.buf += self.cmp.flush()
+
+        if self.mode == "w" and self.buf:
+            self.fileobj.write(self.buf)
+            self.buf = ""
+            if self.comptype == "gz":
+                # The native zlib crc is an unsigned 32-bit integer, but
+                # the Python wrapper implicitly casts that to a signed C
+                # long.  So, on a 32-bit box self.crc may "look negative",
+                # while the same crc on a 64-bit box may "look positive".
+                # To avoid irksome warnings from the `struct` module, force
+                # it to look positive on all boxes.
+                self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
+                self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
+
+        if not self._extfileobj:
+            self.fileobj.close()
+
+        self.closed = True
+
+    def _init_read_gz(self):
+        """Initialize for reading a gzip compressed fileobj.
+        """
+        self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
+        self.dbuf = ""
+
+        # taken from gzip.GzipFile with some alterations
+        if self.__read(2) != "\037\213":
+            raise ReadError("not a gzip file")
+        if self.__read(1) != "\010":
+            raise CompressionError("unsupported compression method")
+
+        flag = ord(self.__read(1))
+        self.__read(6)
+
+        if flag & 4:
+            xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
+            self.read(xlen)
+        if flag & 8:
+            while True:
+                s = self.__read(1)
+                if not s or s == NUL:
+                    break
+        if flag & 16:
+            while True:
+                s = self.__read(1)
+                if not s or s == NUL:
+                    break
+        if flag & 2:
+            self.__read(2)
+
+    def tell(self):
+        """Return the stream's file pointer position.
+        """
+        return self.pos
+
+    def seek(self, pos=0):
+        """Set the stream's file pointer to pos. Negative seeking
+           is forbidden.
+        """
+        if pos - self.pos >= 0:
+            blocks, remainder = divmod(pos - self.pos, self.bufsize)
+            for i in xrange(blocks):
+                self.read(self.bufsize)
+            self.read(remainder)
+        else:
+            raise StreamError("seeking backwards is not allowed")
+        return self.pos
+
+    def read(self, size=None):
+        """Return the next size number of bytes from the stream.
+           If size is not defined, return all bytes of the stream
+           up to EOF.
+        """
+        if size is None:
+            t = []
+            while True:
+                buf = self._read(self.bufsize)
+                if not buf:
+                    break
+                t.append(buf)
+            buf = "".join(t)
+        else:
+            buf = self._read(size)
+        self.pos += len(buf)
+        return buf
+
+    def _read(self, size):
+        """Return size bytes from the stream.
+        """
+        if self.comptype == "tar":
+            return self.__read(size)
+
+        c = len(self.dbuf)
+        t = [self.dbuf]
+        while c < size:
+            buf = self.__read(self.bufsize)
+            if not buf:
+                break
+            try:
+                buf = self.cmp.decompress(buf)
+            except IOError:
+                raise ReadError("invalid compressed data")
+            t.append(buf)
+            c += len(buf)
+        t = "".join(t)
+        self.dbuf = t[size:]
+        return t[:size]
+
+    def __read(self, size):
+        """Return size bytes from stream. If internal buffer is empty,
+           read another block from the stream.
+        """
+        c = len(self.buf)
+        t = [self.buf]
+        while c < size:
+            buf = self.fileobj.read(self.bufsize)
+            if not buf:
+                break
+            t.append(buf)
+            c += len(buf)
+        t = "".join(t)
+        self.buf = t[size:]
+        return t[:size]
+# class _Stream
+
+class _StreamProxy(object):
+    """Small proxy class that enables transparent compression
+       detection for the Stream interface (mode 'r|*').
+    """
+
+    def __init__(self, fileobj):
+        self.fileobj = fileobj
+        self.buf = self.fileobj.read(BLOCKSIZE)
+
+    def read(self, size):
+        self.read = self.fileobj.read
+        return self.buf
+
+    def getcomptype(self):
+        if self.buf.startswith("\037\213\010"):
+            return "gz"
+        if self.buf.startswith("BZh91"):
+            return "bz2"
+        return "tar"
+
+    def close(self):
+        self.fileobj.close()
+# class StreamProxy
+
+class _BZ2Proxy(object):
+    """Small proxy class that enables external file object
+       support for "r:bz2" and "w:bz2" modes. This is actually
+       a workaround for a limitation in bz2 module's BZ2File
+       class which (unlike gzip.GzipFile) has no support for
+       a file object argument.
+    """
+
+    blocksize = 16 * 1024
+
+    def __init__(self, fileobj, mode):
+        self.fileobj = fileobj
+        self.mode = mode
+        self.name = getattr(self.fileobj, "name", None)
+        self.init()
+
+    def init(self):
+        import bz2
+        self.pos = 0
+        if self.mode == "r":
+            self.bz2obj = bz2.BZ2Decompressor()
+            self.fileobj.seek(0)
+            self.buf = ""
+        else:
+            self.bz2obj = bz2.BZ2Compressor()
+
+    def read(self, size):
+        b = [self.buf]
+        x = len(self.buf)
+        while x < size:
+            raw = self.fileobj.read(self.blocksize)
+            if not raw:
+                break
+            data = self.bz2obj.decompress(raw)
+            b.append(data)
+            x += len(data)
+        self.buf = "".join(b)
+
+        buf = self.buf[:size]
+        self.buf = self.buf[size:]
+        self.pos += len(buf)
+        return buf
+
+    def seek(self, pos):
+        if pos < self.pos:
+            self.init()
+        self.read(pos - self.pos)
+
+    def tell(self):
+        return self.pos
+
+    def write(self, data):
+        self.pos += len(data)
+        raw = self.bz2obj.compress(data)
+        self.fileobj.write(raw)
+
+    def close(self):
+        if self.mode == "w":
+            raw = self.bz2obj.flush()
+            self.fileobj.write(raw)
+# class _BZ2Proxy
+
+#------------------------
+# Extraction file object
+#------------------------
+class _FileInFile(object):
+    """A thin wrapper around an existing file object that
+       provides a part of its data as an individual file
+       object.
+    """
+
+    def __init__(self, fileobj, offset, size, sparse=None):
+        self.fileobj = fileobj
+        self.offset = offset
+        self.size = size
+        self.sparse = sparse
+        self.position = 0
+
+    def tell(self):
+        """Return the current file position.
+        """
+        return self.position
+
+    def seek(self, position):
+        """Seek to a position in the file.
+        """
+        self.position = position
+
+    def read(self, size=None):
+        """Read data from the file.
+        """
+        if size is None:
+            size = self.size - self.position
+        else:
+            size = min(size, self.size - self.position)
+
+        if self.sparse is None:
+            return self.readnormal(size)
+        else:
+            return self.readsparse(size)
+
+    def readnormal(self, size):
+        """Read operation for regular files.
+        """
+        self.fileobj.seek(self.offset + self.position)
+        self.position += size
+        return self.fileobj.read(size)
+
+    def readsparse(self, size):
+        """Read operation for sparse files.
+        """
+        data = []
+        while size > 0:
+            buf = self.readsparsesection(size)
+            if not buf:
+                break
+            size -= len(buf)
+            data.append(buf)
+        return "".join(data)
+
+    def readsparsesection(self, size):
+        """Read a single section of a sparse file.
+        """
+        section = self.sparse.find(self.position)
+
+        if section is None:
+            return ""
+
+        size = min(size, section.offset + section.size - self.position)
+
+        if isinstance(section, _data):
+            realpos = section.realpos + self.position - section.offset
+            self.fileobj.seek(self.offset + realpos)
+            self.position += size
+            return self.fileobj.read(size)
+        else:
+            self.position += size
+            return NUL * size
+#class _FileInFile
+
+
+class ExFileObject(object):
+    """File-like object for reading an archive member.
+       Is returned by TarFile.extractfile().
+    """
+    blocksize = 1024
+
+    def __init__(self, tarfile, tarinfo):
+        self.fileobj = _FileInFile(tarfile.fileobj,
+                                   tarinfo.offset_data,
+                                   tarinfo.size,
+                                   getattr(tarinfo, "sparse", None))
+        self.name = tarinfo.name
+        self.mode = "r"
+        self.closed = False
+        self.size = tarinfo.size
+
+        self.position = 0
+        self.buffer = ""
+
+    def read(self, size=None):
+        """Read at most size bytes from the file. If size is not
+           present or None, read all data until EOF is reached.
+        """
+        if self.closed:
+            raise ValueError("I/O operation on closed file")
+
+        buf = ""
+        if self.buffer:
+            if size is None:
+                buf = self.buffer
+                self.buffer = ""
+            else:
+                buf = self.buffer[:size]
+                self.buffer = self.buffer[size:]
+
+        if size is None:
+            buf += self.fileobj.read()
+        else:
+            buf += self.fileobj.read(size - len(buf))
+
+        self.position += len(buf)
+        return buf
+
+    def readline(self, size=-1):
+        """Read one entire line from the file. If size is present
+           and non-negative, return a string with at most that
+           size, which may be an incomplete line.
+        """
+        if self.closed:
+            raise ValueError("I/O operation on closed file")
+
+        if "\n" in self.buffer:
+            pos = self.buffer.find("\n") + 1
+        else:
+            buffers = [self.buffer]
+            while True:
+                buf = self.fileobj.read(self.blocksize)
+                buffers.append(buf)
+                if not buf or "\n" in buf:
+                    self.buffer = "".join(buffers)
+                    pos = self.buffer.find("\n") + 1
+                    if pos == 0:
+                        # no newline found.
+                        pos = len(self.buffer)
+                    break
+
+        if size != -1:
+            pos = min(size, pos)
+
+        buf = self.buffer[:pos]
+        self.buffer = self.buffer[pos:]
+        self.position += len(buf)
+        return buf
+
+    def readlines(self):
+        """Return a list with all remaining lines.
+        """
+        result = []
+        while True:
+            line = self.readline()
+            if not line: break
+            result.append(line)
+        return result
+
+    def tell(self):
+        """Return the current file position.
+        """
+        if self.closed:
+            raise ValueError("I/O operation on closed file")
+
+        return self.position
+
+    def seek(self, pos, whence=0):
+        """Seek to a position in the file.
+        """
+        if self.closed:
+            raise ValueError("I/O operation on closed file")
+
+        if whence == 0:
+            self.position = min(max(pos, 0), self.size)
+        elif whence == 1:
+            if pos < 0:
+                self.position = max(self.position + pos, 0)
+            else:
+                self.position = min(self.position + pos, self.size)
+        elif whence == 2:
+            self.position = max(min(self.size + pos, self.size), 0)
+        else:
+            raise ValueError("Invalid argument")
+
+        self.buffer = ""
+        self.fileobj.seek(self.position)
+
+    def close(self):
+        """Close the file object.
+        """
+        self.closed = True
+
+    def __iter__(self):
+        """Get an iterator over the file's lines.
+        """
+        while True:
+            line = self.readline()
+            if not line:
+                break
+            yield line
+#class ExFileObject
 
 #------------------
 # Exported Classes
 #------------------
-class TarInfo:
+class TarInfo(object):
     """Informational class which holds the details about an
        archive member given by a tar header block.
-       TarInfo instances are returned by TarFile.getmember() and
-       TarFile.getmembers() and are usually created internally.
-       If you want to create a TarInfo instance from the outside,
-       you should use TarFile.gettarinfo() if the file already exists,
-       or you can instanciate the class yourself.
+       TarInfo objects are returned by TarFile.getmember(),
+       TarFile.getmembers() and TarFile.gettarinfo() and are
+       usually created internally.
     """
 
     def __init__(self, name=""):
-        """Construct a TarInfo instance. name is the optional name
+        """Construct a TarInfo object. name is the optional name
            of the member.
         """
-
-        self.name     = name       # member name (dirnames must end with '/')
-        self.mode     = 0100666    # file permissions
-        self.uid      = 0          # user id
-        self.gid      = 0          # group id
-        self.size     = 0          # file size
-        self.mtime    = 0          # modification time
-        self.chksum   = 0          # header checksum
-        self.type     = REGTYPE    # member type
-        self.linkname = ""         # link name
-        self.uname    = "user"     # user name
-        self.gname    = "group"    # group name
-        self.devmajor = 0          #-
-        self.devminor = 0          #-for use with CHRTYPE and BLKTYPE
-        self.prefix   = ""         # prefix, holding information
-#                                  # about sparse files
-
-        self.offset   = 0          # the tar header starts here
-        self.offset_data = 0       # the optional file's data starts here
-
-    def init_from_stat(self, statres):
-        """Initialize various attributes from statobj (these are
-        returned by os.stat() and related functions.  Return none on error"""
-        stmd = statres.st_mode
-        if stat.S_ISREG(stmd): type = REGTYPE
-        elif stat.S_ISDIR(stmd):
-            type = DIRTYPE
-            if self.name[-1:] != "/": self.name += "/"
-        elif stat.S_ISFIFO(stmd): type = FIFOTYPE
-        elif stat.S_ISLNK(stmd): type = SYMTYPE
-        elif stat.S_ISCHR(stmd): type = CHRTYPE
-        elif stat.S_ISBLK(stmd): type = BLKTYPE
-        else: return None
-
-        # Fill the TarInfo instance with all
-        # information we can get.
-        self.mode  = stat.S_IMODE(stmd)
-        self.uid   = statres.st_uid
-        self.gid   = statres.st_gid
-        self.size  = statres.st_size
-        self.mtime = statres.st_mtime
-        self.type  = type
-        if pwd:
-            try: self.uname = uid2uname(self.uid)
-            except KeyError: pass
-        if grp:
-            try: self.gname = gid2gname(self.gid)
-            except KeyError: pass
-
-        if type in (CHRTYPE, BLKTYPE):
-            if hasattr(os, "major") and hasattr(os, "minor"):
-                self.devmajor = os.major(statres.st_rdev)
-                self.devminor = os.minor(statres.st_rdev)
-        return 1
-
-    def set_arcname(self, name):
-        """Set the name of the member in the archive.  Backward
-        slashes are converted to forward slashes, Absolute paths are
-        turned to relative paths.
-        """
-        arcname = normpath(name)
-        drv, arcname = os.path.splitdrive(arcname) #@UnusedVariable
-        while arcname[0:1] == "/":
-            arcname = arcname[1:]
-        self.name = arcname
-
-    def getheader(self):
-        """Return a tar header block as a 512 byte string.
-        """
-        if self.uid > 2097151 or self.uid < 0:
-            sys.stderr.write("uid %i of file %s not in range. Setting uid to 60001\n" % (self.uid,self.name))
-            self.uid = 60001
-        if self.gid > 2097151 or self.gid < 0:
-            sys.stderr.write("gid %i of file %s not in range. Setting gid to 60001\n" % (self.gid, self.name))
-            self.gid = 60001
-        # The following code was contributed by Detlef Lannert.
-        parts = []
-        for value, fieldsize in (
-                (self.name, 100),
-                ("%07o" % self.mode, 8),
-                ("%07o" % self.uid, 8),
-                ("%07o" % self.gid, 8),
-                ("%011o" % self.size, 12),
-                ("%011o" % self.mtime, 12),
-                ("        ", 8),
-                (self.type, 1),
-                (self.linkname, 100),
-                (MAGIC, 6),
-                (VERSION, 2),
-                (self.uname, 32),
-                (self.gname, 32),
-                ("%07o" % self.devmajor, 8),
-                ("%07o" % self.devminor, 8),
-                (self.prefix, 155)
-                ):
-            l = len(value)
-            parts.append(value + (fieldsize - l) * "\0")
-
-        buf = "".join(parts)
-        chksum = calc_chksum(buf)
-        buf = buf[:148] + "%06o\0" % chksum + buf[155:]
-        buf += (512 - len(buf)) * "\0"
-        self.buf = buf
+        self.name = name        # member name
+        self.mode = 0644        # file permissions
+        self.uid = 0            # user id
+        self.gid = 0            # group id
+        self.size = 0           # file size
+        self.mtime = 0          # modification time
+        self.chksum = 0         # header checksum
+        self.type = REGTYPE     # member type
+        self.linkname = ""      # link name
+        self.uname = ""         # user name
+        self.gname = ""         # group name
+        self.devmajor = 0       # device major number
+        self.devminor = 0       # device minor number
+
+        self.offset = 0         # the tar header starts here
+        self.offset_data = 0    # the file's data starts here
+
+        self.pax_headers = {}   # pax header information
+
+    # In pax headers the "name" and "linkname" field are called
+    # "path" and "linkpath".
+    def _getpath(self):
+        return self.name
+    def _setpath(self, name):
+        self.name = name
+    path = property(_getpath, _setpath)
+
+    def _getlinkpath(self):
+        return self.linkname
+    def _setlinkpath(self, linkname):
+        self.linkname = linkname
+    linkpath = property(_getlinkpath, _setlinkpath)
+
+    def __repr__(self):
+        return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
+
+    def get_info(self, encoding, errors):
+        """Return the TarInfo's attributes as a dictionary.
+        """
+        info = {
+            "name":     self.name,
+            "mode":     self.mode & 07777,
+            "uid":      self.uid,
+            "gid":      self.gid,
+            "size":     self.size,
+            "mtime":    self.mtime,
+            "chksum":   self.chksum,
+            "type":     self.type,
+            "linkname": self.linkname,
+            "uname":    self.uname,
+            "gname":    self.gname,
+            "devmajor": self.devmajor,
+            "devminor": self.devminor
+        }
+
+        if info["type"] == DIRTYPE and not info["name"].endswith("/"):
+            info["name"] += "/"
+
+        for key in ("name", "linkname", "uname", "gname"):
+            if type(info[key]) is unicode:
+                info[key] = info[key].encode(encoding, errors)
+
+        return info
+
+    def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
+        """Return a tar header as a string of 512 byte blocks.
+        """
+        info = self.get_info(encoding, errors)
+
+        if format == USTAR_FORMAT:
+            return self.create_ustar_header(info)
+        elif format == GNU_FORMAT:
+            return self.create_gnu_header(info)
+        elif format == PAX_FORMAT:
+            return self.create_pax_header(info, encoding, errors)
+        else:
+            raise ValueError("invalid format")
+
+    def create_ustar_header(self, info):
+        """Return the object as a ustar header block.
+        """
+        info["magic"] = POSIX_MAGIC
+
+        if len(info["linkname"]) > LENGTH_LINK:
+            raise ValueError("linkname is too long")
+
+        if len(info["name"]) > LENGTH_NAME:
+            info["prefix"], info["name"] = self._posix_split_name(info["name"])
+
+        return self._create_header(info, USTAR_FORMAT)
+
+    def create_gnu_header(self, info):
+        """Return the object as a GNU header block sequence.
+        """
+        info["magic"] = GNU_MAGIC
+
+        buf = ""
+        if len(info["linkname"]) > LENGTH_LINK:
+            buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
+
+        if len(info["name"]) > LENGTH_NAME:
+            buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
+
+        return buf + self._create_header(info, GNU_FORMAT)
+
+    def create_pax_header(self, info, encoding, errors):
+        """Return the object as a ustar header block. If it cannot be
+           represented this way, prepend a pax extended header sequence
+           with supplement information.
+        """
+        info["magic"] = POSIX_MAGIC
+        pax_headers = self.pax_headers.copy()
+
+        # Test string fields for values that exceed the field length or cannot
+        # be represented in ASCII encoding.
+        for name, hname, length in (
+                ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
+                ("uname", "uname", 32), ("gname", "gname", 32)):
+
+            if hname in pax_headers:
+                # The pax header has priority.
+                continue
+
+            val = info[name].decode(encoding, errors)
+
+            # Try to encode the string as ASCII.
+            try:
+                val.encode("ascii")
+            except UnicodeEncodeError:
+                pax_headers[hname] = val
+                continue
+
+            if len(info[name]) > length:
+                pax_headers[hname] = val
+
+        # Test number fields for values that exceed the field limit or values
+        # that like to be stored as float.
+        for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
+            if name in pax_headers:
+                # The pax header has priority. Avoid overflow.
+                info[name] = 0
+                continue
+
+            val = info[name]
+            if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
+                pax_headers[name] = unicode(val)
+                info[name] = 0
+
+        # Create a pax extended header if necessary.
+        if pax_headers:
+            buf = self._create_pax_generic_header(pax_headers)
+        else:
+            buf = ""
+
+        return buf + self._create_header(info, USTAR_FORMAT)
+
+    @classmethod
+    def create_pax_global_header(cls, pax_headers):
+        """Return the object as a pax global header block sequence.
+        """
+        return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
+
+    def _posix_split_name(self, name):
+        """Split a name longer than 100 chars into a prefix
+           and a name part.
+        """
+        prefix = name[:LENGTH_PREFIX + 1]
+        while prefix and prefix[-1] != "/":
+            prefix = prefix[:-1]
+
+        name = name[len(prefix):]
+        prefix = prefix[:-1]
+
+        if not prefix or len(name) > LENGTH_NAME:
+            raise ValueError("name is too long")
+        return prefix, name
+
+    @staticmethod
+    def _create_header(info, format):
+        """Return a header block. info is a dictionary with file
+           information, format must be one of the *_FORMAT constants.
+        """
+        parts = [
+            stn(info.get("name", ""), 100),
+            itn(info.get("mode", 0) & 07777, 8, format),
+            itn(info.get("uid", 0), 8, format),
+            itn(info.get("gid", 0), 8, format),
+            itn(info.get("size", 0), 12, format),
+            itn(info.get("mtime", 0), 12, format),
+            "        ", # checksum field
+            info.get("type", REGTYPE),
+            stn(info.get("linkname", ""), 100),
+            stn(info.get("magic", POSIX_MAGIC), 8),
+            stn(info.get("uname", ""), 32),
+            stn(info.get("gname", ""), 32),
+            itn(info.get("devmajor", 0), 8, format),
+            itn(info.get("devminor", 0), 8, format),
+            stn(info.get("prefix", ""), 155)
+        ]
+
+        buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
+        chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
+        buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
         return buf
 
+    @staticmethod
+    def _create_payload(payload):
+        """Return the string payload filled with zero bytes
+           up to the next 512 byte border.
+        """
+        blocks, remainder = divmod(len(payload), BLOCKSIZE)
+        if remainder > 0:
+            payload += (BLOCKSIZE - remainder) * NUL
+        return payload
+
+    @classmethod
+    def _create_gnu_long_header(cls, name, type):
+        """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
+           for name.
+        """
+        name += NUL
+
+        info = {}
+        info["name"] = "././@LongLink"
+        info["type"] = type
+        info["size"] = len(name)
+        info["magic"] = GNU_MAGIC
+
+        # create extended header + name blocks.
+        return cls._create_header(info, USTAR_FORMAT) + \
+                cls._create_payload(name)
+
+    @classmethod
+    def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
+        """Return a POSIX.1-2001 extended or global header sequence
+           that contains a list of keyword, value pairs. The values
+           must be unicode objects.
+        """
+        records = []
+        for keyword, value in pax_headers.iteritems():
+            keyword = keyword.encode("utf8")
+            value = value.encode("utf8")
+            l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
+            n = p = 0
+            while True:
+                n = l + len(str(p))
+                if n == p:
+                    break
+                p = n
+            records.append("%d %s=%s\n" % (p, keyword, value))
+        records = "".join(records)
+
+        # We use a hardcoded "././@PaxHeader" name like star does
+        # instead of the one that POSIX recommends.
+        info = {}
+        info["name"] = "././@PaxHeader"
+        info["type"] = type
+        info["size"] = len(records)
+        info["magic"] = POSIX_MAGIC
+
+        # Create pax header + record blocks.
+        return cls._create_header(info, USTAR_FORMAT) + \
+                cls._create_payload(records)
+
+    @classmethod
+    def frombuf(cls, buf):
+        """Construct a TarInfo object from a 512 byte string buffer.
+        """
+        if len(buf) == 0:
+            raise EmptyHeaderError("empty header")
+        if len(buf) != BLOCKSIZE:
+            raise TruncatedHeaderError("truncated header")
+        if buf.count(NUL) == BLOCKSIZE:
+            raise EOFHeaderError("end of file header")
+
+        chksum = nti(buf[148:156])
+        if chksum not in calc_chksums(buf):
+            raise InvalidHeaderError("bad checksum")
+
+        obj = cls()
+        obj.buf = buf
+        obj.name = nts(buf[0:100])
+        obj.mode = nti(buf[100:108])
+        obj.uid = nti(buf[108:116])
+        obj.gid = nti(buf[116:124])
+        obj.size = nti(buf[124:136])
+        obj.mtime = nti(buf[136:148])
+        obj.chksum = chksum
+        obj.type = buf[156:157]
+        obj.linkname = nts(buf[157:257])
+        obj.uname = nts(buf[265:297])
+        obj.gname = nts(buf[297:329])
+        obj.devmajor = nti(buf[329:337])
+        obj.devminor = nti(buf[337:345])
+        prefix = nts(buf[345:500])
+
+        # Old V7 tar format represents a directory as a regular
+        # file with a trailing slash.
+        if obj.type == AREGTYPE and obj.name.endswith("/"):
+            obj.type = DIRTYPE
+
+        # Remove redundant slashes from directories.
+        if obj.isdir():
+            obj.name = obj.name.rstrip("/")
+
+        # Reconstruct a ustar longname.
+        if prefix and obj.type not in GNU_TYPES:
+            obj.name = prefix + "/" + obj.name
+        return obj
+
+    @classmethod
+    def fromtarfile(cls, tarfile):
+        """Return the next TarInfo object from TarFile object
+           tarfile.
+        """
+        buf = tarfile.fileobj.read(BLOCKSIZE)
+        obj = cls.frombuf(buf)
+        obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
+        return obj._proc_member(tarfile)
+
+    #--------------------------------------------------------------------------
+    # The following are methods that are called depending on the type of a
+    # member. The entry point is _proc_member() which can be overridden in a
+    # subclass to add custom _proc_*() methods. A _proc_*() method MUST
+    # implement the following
+    # operations:
+    # 1. Set self.offset_data to the position where the data blocks begin,
+    #    if there is data that follows.
+    # 2. Set tarfile.offset to the position where the next member's header will
+    #    begin.
+    # 3. Return self or another valid TarInfo object.
+    def _proc_member(self, tarfile):
+        """Choose the right processing method depending on
+           the type and call it.
+        """
+        if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
+            return self._proc_gnulong(tarfile)
+        elif self.type == GNUTYPE_SPARSE:
+            return self._proc_sparse(tarfile)
+        elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
+            return self._proc_pax(tarfile)
+        else:
+            return self._proc_builtin(tarfile)
+
+    def _proc_builtin(self, tarfile):
+        """Process a builtin type or an unknown type which
+           will be treated as a regular file.
+        """
+        self.offset_data = tarfile.fileobj.tell()
+        offset = self.offset_data
+        if self.isreg() or self.type not in SUPPORTED_TYPES:
+            # Skip the following data blocks.
+            offset += self._block(self.size)
+        tarfile.offset = offset
+
+        # Patch the TarInfo object with saved global
+        # header information.
+        self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
+
+        return self
+
+    def _proc_gnulong(self, tarfile):
+        """Process the blocks that hold a GNU longname
+           or longlink member.
+        """
+        buf = tarfile.fileobj.read(self._block(self.size))
+
+        # Fetch the next header and process it.
+        try:
+            next = self.fromtarfile(tarfile)
+        except HeaderError:
+            raise SubsequentHeaderError("missing or bad subsequent header")
+
+        # Patch the TarInfo object from the next header with
+        # the longname information.
+        next.offset = self.offset
+        if self.type == GNUTYPE_LONGNAME:
+            next.name = nts(buf)
+        elif self.type == GNUTYPE_LONGLINK:
+            next.linkname = nts(buf)
+
+        return next
+
+    def _proc_sparse(self, tarfile):
+        """Process a GNU sparse header plus extra headers.
+        """
+        buf = self.buf
+        sp = _ringbuffer()
+        pos = 386
+        lastpos = 0L
+        realpos = 0L
+        # There are 4 possible sparse structs in the
+        # first header.
+        for i in xrange(4):
+            try:
+                offset = nti(buf[pos:pos + 12])
+                numbytes = nti(buf[pos + 12:pos + 24])
+            except ValueError:
+                break
+            if offset > lastpos:
+                sp.append(_hole(lastpos, offset - lastpos))
+            sp.append(_data(offset, numbytes, realpos))
+            realpos += numbytes
+            lastpos = offset + numbytes
+            pos += 24
+
+        isextended = ord(buf[482])
+        origsize = nti(buf[483:495])
+
+        # If the isextended flag is given,
+        # there are extra headers to process.
+        while isextended == 1:
+            buf = tarfile.fileobj.read(BLOCKSIZE)
+            pos = 0
+            for i in xrange(21):
+                try:
+                    offset = nti(buf[pos:pos + 12])
+                    numbytes = nti(buf[pos + 12:pos + 24])
+                except ValueError:
+                    break
+                if offset > lastpos:
+                    sp.append(_hole(lastpos, offset - lastpos))
+                sp.append(_data(offset, numbytes, realpos))
+                realpos += numbytes
+                lastpos = offset + numbytes
+                pos += 24
+            isextended = ord(buf[504])
+
+        if lastpos < origsize:
+            sp.append(_hole(lastpos, origsize - lastpos))
+
+        self.sparse = sp
+
+        self.offset_data = tarfile.fileobj.tell()
+        tarfile.offset = self.offset_data + self._block(self.size)
+        self.size = origsize
+
+        return self
+
+    def _proc_pax(self, tarfile):
+        """Process an extended or global header as described in
+           POSIX.1-2001.
+        """
+        # Read the header information.
+        buf = tarfile.fileobj.read(self._block(self.size))
+
+        # A pax header stores supplemental information for either
+        # the following file (extended) or all following files
+        # (global).
+        if self.type == XGLTYPE:
+            pax_headers = tarfile.pax_headers
+        else:
+            pax_headers = tarfile.pax_headers.copy()
+
+        # Parse pax header information. A record looks like that:
+        # "%d %s=%s\n" % (length, keyword, value). length is the size
+        # of the complete record including the length field itself and
+        # the newline. keyword and value are both UTF-8 encoded strings.
+        regex = re.compile(r"(\d+) ([^=]+)=", re.U)
+        pos = 0
+        while True:
+            match = regex.match(buf, pos)
+            if not match:
+                break
+
+            length, keyword = match.groups()
+            length = int(length)
+            value = buf[match.end(2) + 1:match.start(1) + length - 1]
+
+            keyword = keyword.decode("utf8")
+            value = value.decode("utf8")
+
+            pax_headers[keyword] = value
+            pos += length
+
+        # Fetch the next header.
+        try:
+            next = self.fromtarfile(tarfile)
+        except HeaderError:
+            raise SubsequentHeaderError("missing or bad subsequent header")
+
+        if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
+            # Patch the TarInfo object with the extended header info.
+            next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
+            next.offset = self.offset
+
+            if "size" in pax_headers:
+                # If the extended header replaces the size field,
+                # we need to recalculate the offset where the next
+                # header starts.
+                offset = next.offset_data
+                if next.isreg() or next.type not in SUPPORTED_TYPES:
+                    offset += next._block(next.size)
+                tarfile.offset = offset
+
+        return next
+
+    def _apply_pax_info(self, pax_headers, encoding, errors):
+        """Replace fields with supplemental information from a previous
+           pax extended or global header.
+        """
+        for keyword, value in pax_headers.iteritems():
+            if keyword not in PAX_FIELDS:
+                continue
+
+            if keyword == "path":
+                value = value.rstrip("/")
+
+            if keyword in PAX_NUMBER_FIELDS:
+                try:
+                    value = PAX_NUMBER_FIELDS[keyword](value)
+                except ValueError:
+                    value = 0
+            else:
+                value = uts(value, encoding, errors)
+
+            setattr(self, keyword, value)
+
+        self.pax_headers = pax_headers.copy()
+
+    def _block(self, count):
+        """Round up a byte count by BLOCKSIZE and return it,
+           e.g. _block(834) => 1024.
+        """
+        blocks, remainder = divmod(count, BLOCKSIZE)
+        if remainder:
+            blocks += 1
+        return blocks * BLOCKSIZE
+
     def isreg(self):
         return self.type in REGULAR_TYPES
     def isfile(self):
@@ -444,10 +1470,10 @@
         return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
 # class TarInfo
 
-
-class TarFile:
-    """Class representing a TAR archive file on disk.
+class TarFile(object):
+    """The TarFile Class provides an interface to tar archives.
     """
+
     debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
 
     dereference = False         # If true, add content of linked file to the
@@ -456,235 +1482,459 @@
     ignore_zeros = False        # If true, skips empty or invalid blocks and
                                 # continues processing.
 
-    errorlevel = 0              # If 0, fatal errors only appear in debug
+    errorlevel = 1              # If 0, fatal errors only appear in debug
                                 # messages (if debug >= 0). If > 0, errors
                                 # are passed to the caller as exceptions.
 
-    def __init__(self, name=None, mode="r", fileobj=None):
-        self.name = name
-
+    format = DEFAULT_FORMAT     # The format to use when creating an archive.
+
+    encoding = ENCODING         # Encoding for 8-bit character strings.
+
+    errors = None               # Error handler for unicode conversion.
+
+    tarinfo = TarInfo           # The default TarInfo class to use.
+
+    fileobject = ExFileObject   # The default ExFileObject class to use.
+
+    def __init__(self, name=None, mode="r", fileobj=None, format=None,
+            tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
+            errors=None, pax_headers=None, debug=None, errorlevel=None):
+        """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
+           read from an existing archive, 'a' to append data to an existing
+           file or 'w' to create a new file overwriting an existing one. `mode'
+           defaults to 'r'.
+           If `fileobj' is given, it is used for reading or writing data. If it
+           can be determined, `mode' is overridden by `fileobj's mode.
+           `fileobj' is not closed, when TarFile is closed.
+        """
         if len(mode) > 1 or mode not in "raw":
-            raise ValueError, "mode must be either 'r', 'a' or 'w', " \
-                                "not '%s'" % mode
-        self._mode = mode
-        self.mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
+            raise ValueError("mode must be 'r', 'a' or 'w'")
+        self.mode = mode
+        self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
 
         if not fileobj:
-            fileobj = __builtin__.file(self.name, self.mode)
-            self._extfileobj = 0
+            if self.mode == "a" and not os.path.exists(name):
+                # Create nonexistent files in append mode.
+                self.mode = "w"
+                self._mode = "wb"
+            fileobj = bltn_open(name, self._mode)
+            self._extfileobj = False
         else:
-            if self.name is None and hasattr(fileobj, "name"):
-                self.name = fileobj.name
+            if name is None and hasattr(fileobj, "name"):
+                name = fileobj.name
             if hasattr(fileobj, "mode"):
-                self.mode = fileobj.mode
-            self._extfileobj = 1
+                self._mode = fileobj.mode
+            self._extfileobj = True
+				if name:
+						self.name = os.path.abspath(name)
+				else:
+						self.name = None
         self.fileobj = fileobj
 
-        self.init_datastructures()
-
-        if self._mode == "a":
-            self.fileobj.seek(0)
-            self._load()
-
-    def init_datastructures(self):
-        # Init datastructures
-        #self.members     = []       # list of members as TarInfo instances
-        #self.membernames = []       # names of members
-        #self.chunks      = [0]      # chunk cache
-        self._loaded     = 0        # flag if all members have been read
-        self.offset      = 0l       # current position in the archive file
-        self.inodes      = {}       # dictionary caching the inodes of
-                                    # archive members already added
-        self.next_chunk = 0 # offset of next tarinfo, used when reading
-
-    def close(self):
-        """Close the TarFile instance and do some cleanup.
-        """
-        if self.fileobj:
-            if self._mode in "aw":
-                # fill up the end with zero-blocks
-                # (like option -b20 for tar does)
-                blocks, remainder = divmod(self.offset, RECORDSIZE) #@UnusedVariable
-                if remainder > 0:
-                    self.fileobj.write("\0" * (RECORDSIZE - remainder))
-
+        # Init attributes.
+        if format is not None:
+            self.format = format
+        if tarinfo is not None:
+            self.tarinfo = tarinfo
+        if dereference is not None:
+            self.dereference = dereference
+        if ignore_zeros is not None:
+            self.ignore_zeros = ignore_zeros
+        if encoding is not None:
+            self.encoding = encoding
+
+        if errors is not None:
+            self.errors = errors
+        elif mode == "r":
+            self.errors = "utf-8"
+        else:
+            self.errors = "strict"
+
+        if pax_headers is not None and self.format == PAX_FORMAT:
+            self.pax_headers = pax_headers
+        else:
+            self.pax_headers = {}
+
+        if debug is not None:
+            self.debug = debug
+        if errorlevel is not None:
+            self.errorlevel = errorlevel
+
+        # Init datastructures.
+        self.closed = False
+        self.members = []       # list of members as TarInfo objects
+        self._loaded = False    # flag if all members have been read
+        self.offset = self.fileobj.tell()
+                                # current position in the archive file
+        self.inodes = {}        # dictionary caching the inodes of
+                                # archive members already added
+
+        try:
+            if self.mode == "r":
+                self.firstmember = None
+                self.firstmember = self.next()
+
+            if self.mode == "a":
+                # Move to the end of the archive,
+                # before the first empty block.
+                while True:
+                    self.fileobj.seek(self.offset)
+                    try:
+                        tarinfo = self.tarinfo.fromtarfile(self)
+                        self.members.append(tarinfo)
+                    except EOFHeaderError:
+                        self.fileobj.seek(self.offset)
+                        break
+                    except HeaderError, e:
+                        raise ReadError(str(e))
+
+            if self.mode in "aw":
+                self._loaded = True
+
+                if self.pax_headers:
+                    buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
+                    self.fileobj.write(buf)
+                    self.offset += len(buf)
+        except:
             if not self._extfileobj:
                 self.fileobj.close()
-            self.fileobj = None
-
-    def throwaway_until(self, position):
-        """Read data, throwing it away until we get to position"""
-        bufsize = 16 * 1024
-        bytes_to_read = position - self.offset
-        assert bytes_to_read >= 0
-        while bytes_to_read >= bufsize:
-            self.fileobj.read(bufsize)
-            bytes_to_read -= bufsize
-        self.fileobj.read(bytes_to_read)
-        self.offset = position
-
-    def next(self):
-        """Return the next member from the archive.
-           Return None if the end is reached.
-           Can be used in a while statement, is used
-           for Iteration (see __iter__()) and internally.
+            self.closed = True
+            raise
+
+    def _getposix(self):
+        return self.format == USTAR_FORMAT
+    def _setposix(self, value):
+        import warnings
+        warnings.warn("use the format attribute instead", DeprecationWarning,
+                      2)
+        if value:
+            self.format = USTAR_FORMAT
+        else:
+            self.format = GNU_FORMAT
+    posix = property(_getposix, _setposix)
+
+    #--------------------------------------------------------------------------
+    # Below are the classmethods which act as alternate constructors to the
+    # TarFile class. The open() method is the only one that is needed for
+    # public use; it is the "super"-constructor and is able to select an
+    # adequate "sub"-constructor for a particular compression using the mapping
+    # from OPEN_METH.
+    #
+    # This concept allows one to subclass TarFile without losing the comfort of
+    # the super-constructor. A sub-constructor is registered and made available
+    # by adding it to the mapping in OPEN_METH.
+
+    @classmethod
+    def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
+        """Open a tar archive for reading, writing or appending. Return
+           an appropriate TarFile class.
+
+           mode:
+           'r' or 'r:*' open for reading with transparent compression
+           'r:'         open for reading exclusively uncompressed
+           'r:gz'       open for reading with gzip compression
+           'r:bz2'      open for reading with bzip2 compression
+           'a' or 'a:'  open for appending, creating the file if necessary
+           'w' or 'w:'  open for writing without compression
+           'w:gz'       open for writing with gzip compression
+           'w:bz2'      open for writing with bzip2 compression
+
+           'r|*'        open a stream of tar blocks with transparent compression
+           'r|'         open an uncompressed stream of tar blocks for reading
+           'r|gz'       open a gzip compressed stream of tar blocks
+           'r|bz2'      open a bzip2 compressed stream of tar blocks
+           'w|'         open an uncompressed stream for writing
+           'w|gz'       open a gzip compressed stream for writing
+           'w|bz2'      open a bzip2 compressed stream for writing
         """
-        if not self.fileobj:
-            raise ValueError, "I/O operation on closed file"
-        if self._mode not in "ra":
-            raise ValueError, "reading from a write-mode file"
-
-        # Read the next block.
-        # self.fileobj.seek(self.chunks[-1])
-        #self.fileobj.seek(self.next_chunk)
-        #self.offset = self.next_chunk
-        self.throwaway_until(self.next_chunk)
-        while 1:
-            buf = self.fileobj.read(BLOCKSIZE)
-            if not buf:
-                return None
-            try:
-                tarinfo = self._buftoinfo(buf)
-            except ValueError:
-                if self.ignore_zeros:
-                    if buf.count("\0") == BLOCKSIZE:
-                        adj = "empty"
-                    else:
-                        adj = "invalid"
-                    self._dbg(2, "0x%X: %s block\n" % (self.offset, adj))
-                    self.offset += BLOCKSIZE
+
+        if not name and not fileobj:
+            raise ValueError("nothing to open")
+
+        if mode in ("r", "r:*"):
+            # Find out which *open() is appropriate for opening the file.
+            for comptype in cls.OPEN_METH:
+                func = getattr(cls, cls.OPEN_METH[comptype])
+                if fileobj is not None:
+                    saved_pos = fileobj.tell()
+                try:
+                    return func(name, "r", fileobj, **kwargs)
+                except (ReadError, CompressionError), e:
+                    if fileobj is not None:
+                        fileobj.seek(saved_pos)
                     continue
-                else:
-                    return None
-            break
-
-        # If the TarInfo instance contains a GNUTYPE longname or longlink
-        # statement, we must process this first.
-        if tarinfo.type in (GNUTYPE_LONGLINK, GNUTYPE_LONGNAME):
-            tarinfo = self._proc_gnulong(tarinfo, tarinfo.type)
-            if not tarinfo:
-                return None
-
-        if tarinfo.issparse():
-            assert 0, "Sparse file support turned off"
-            # Sparse files need some care,
-            # due to the possible extra headers.
-            tarinfo.offset = self.offset
-            self.offset += BLOCKSIZE
-            origsize = self._proc_sparse(tarinfo)
-            tarinfo.offset_data = self.offset
-            blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
-            if remainder:
-                blocks += 1
-            self.offset += blocks * BLOCKSIZE
-            tarinfo.size = origsize
+            raise ReadError("file could not be opened successfully")
+
+        elif ":" in mode:
+            filemode, comptype = mode.split(":", 1)
+            filemode = filemode or "r"
+            comptype = comptype or "tar"
+
+            # Select the *open() function according to
+            # given compression.
+            if comptype in cls.OPEN_METH:
+                func = getattr(cls, cls.OPEN_METH[comptype])
+            else:
+                raise CompressionError("unknown compression type %r" % comptype)
+            return func(name, filemode, fileobj, **kwargs)
+
+        elif "|" in mode:
+            filemode, comptype = mode.split("|", 1)
+            filemode = filemode or "r"
+            comptype = comptype or "tar"
+
+            if filemode not in "rw":
+                raise ValueError("mode must be 'r' or 'w'")
+
+            t = cls(name, filemode,
+                    _Stream(name, filemode, comptype, fileobj, bufsize),
+                    **kwargs)
+            t._extfileobj = False
+            return t
+
+        elif mode in "aw":
+            return cls.taropen(name, mode, fileobj, **kwargs)
+
+        raise ValueError("undiscernible mode")
+
+    @classmethod
+    def taropen(cls, name, mode="r", fileobj=None, **kwargs):
+        """Open uncompressed tar archive name for reading or writing.
+        """
+        if len(mode) > 1 or mode not in "raw":
+            raise ValueError("mode must be 'r', 'a' or 'w'")
+        return cls(name, mode, fileobj, **kwargs)
+
+    @classmethod
+    def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
+        """Open gzip compressed tar archive name for reading or writing.
+           Appending is not allowed.
+        """
+        if len(mode) > 1 or mode not in "rw":
+            raise ValueError("mode must be 'r' or 'w'")
+
+        try:
+            import gzip
+            gzip.GzipFile
+        except (ImportError, AttributeError):
+            raise CompressionError("gzip module is not available")
+
+        if fileobj is None:
+            fileobj = bltn_open(name, mode + "b")
+
+        try:
+            t = cls.taropen(name, mode,
+                gzip.GzipFile(name, mode, compresslevel, fileobj),
+                **kwargs)
+        except IOError:
+            raise ReadError("not a gzip file")
+        t._extfileobj = False
+        return t
+
+    @classmethod
+    def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
+        """Open bzip2 compressed tar archive name for reading or writing.
+           Appending is not allowed.
+        """
+        if len(mode) > 1 or mode not in "rw":
+            raise ValueError("mode must be 'r' or 'w'.")
+
+        try:
+            import bz2
+        except ImportError:
+            raise CompressionError("bz2 module is not available")
+
+        if fileobj is not None:
+            fileobj = _BZ2Proxy(fileobj, mode)
         else:
-            tarinfo.offset = self.offset
-            self.offset += BLOCKSIZE
-            tarinfo.offset_data = self.offset
-            if tarinfo.isreg():
-                ## Skip the following data blocks.
-                blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
-                if remainder:
-                    blocks += 1
-                self.next_chunk = self.offset + (blocks * BLOCKSIZE)
-            else: self.next_chunk = self.offset
-
-        #self.members.append(tarinfo)  These use too much memory
-        #self.membernames.append(tarinfo.name)
-        #self.chunks.append(self.offset)
-        return tarinfo
+            fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
+
+        try:
+            t = cls.taropen(name, mode, fileobj, **kwargs)
+        except (IOError, EOFError):
+            raise ReadError("not a bzip2 file")
+        t._extfileobj = False
+        return t
+
+    # All *open() methods are registered here.
+    OPEN_METH = {
+        "tar": "taropen",   # uncompressed tar
+        "gz":  "gzopen",    # gzip compressed tar
+        "bz2": "bz2open"    # bzip2 compressed tar
+    }
+
+    #--------------------------------------------------------------------------
+    # The public methods which TarFile provides:
+
+    def close(self):
+        """Close the TarFile. In write-mode, two finishing zero blocks are
+           appended to the archive.
+        """
+        if self.closed:
+            return
+
+        if self.mode in "aw":
+            self.fileobj.write(NUL * (BLOCKSIZE * 2))
+            self.offset += (BLOCKSIZE * 2)
+            # fill up the end with zero-blocks
+            # (like option -b20 for tar does)
+            blocks, remainder = divmod(self.offset, RECORDSIZE)
+            if remainder > 0:
+                self.fileobj.write(NUL * (RECORDSIZE - remainder))
+
+        if not self._extfileobj:
+            self.fileobj.close()
+        self.closed = True
 
     def getmember(self, name):
-        """Return a TarInfo instance for member name.
-        """
-        if name not in self.membernames and not self._loaded:
-            self._load()
-        if name not in self.membernames:
-            raise KeyError, "filename `%s' not found in tar archive" % name
-        return self._getmember(name)
-
-    def getinfo(self, name):
-        """Return a TarInfo instance for member name.
-           This method will be deprecated in 0.6,
-           use getmember() instead.
-        """
-        # XXX kick this out in 0.6
-        import warnings
-        warnings.warn("use getmember() instead", DeprecationWarning)
-        return self.getmember(name)
+        """Return a TarInfo object for member `name'. If `name' can not be
+           found in the archive, KeyError is raised. If a member occurs more
+           than once in the archive, its last occurrence is assumed to be the
+           most up-to-date version.
+        """
+        tarinfo = self._getmember(name)
+        if tarinfo is None:
+            raise KeyError("filename %r not found" % name)
+        return tarinfo
 
     def getmembers(self):
-        """Return a list of all members in the archive
-           (as TarInfo instances).
+        """Return the members of the archive as a list of TarInfo objects. The
+           list has the same order as the members in the archive.
         """
+        self._check()
         if not self._loaded:    # if we want to obtain a list of
             self._load()        # all members, we first have to
                                 # scan the whole archive.
         return self.members
 
     def getnames(self):
-        """Return a list of names of all members in the
-           archive.
-        """
-        if not self._loaded:
-            self._load()
-        return self.membernames
-
-    def gettarinfo(self, name, arcname=None):
-        """Create a TarInfo instance from an existing file.
-           Optional arcname defines the name under which the file
-           shall be stored in the archive.
-        """
-        # Now, fill the TarInfo instance with
+        """Return the members of the archive as a list of their names. It has
+           the same order as the list returned by getmembers().
+        """
+        return [tarinfo.name for tarinfo in self.getmembers()]
+
+    def gettarinfo(self, name=None, arcname=None, fileobj=None):
+        """Create a TarInfo object for either the file `name' or the file
+           object `fileobj' (using os.fstat on its file descriptor). You can
+           modify some of the TarInfo's attributes before you add it using
+           addfile(). If given, `arcname' specifies an alternative name for the
+           file in the archive.
+        """
+        self._check("aw")
+
+        # When fileobj is given, replace name by
+        # fileobj's real name.
+        if fileobj is not None:
+            name = fileobj.name
+
+        # Building the name of the member in the archive.
+        # Backward slashes are converted to forward slashes,
+        # Absolute paths are turned to relative paths.
+        if arcname is None:
+            arcname = name
+        drv, arcname = os.path.splitdrive(arcname)
+        arcname = arcname.replace(os.sep, "/")
+        arcname = arcname.lstrip("/")
+
+        # Now, fill the TarInfo object with
         # information specific for the file.
-        tarinfo = TarInfo()
-
-        if arcname is None: tarinfo.set_arcname(name)
-        else: tarinfo.set_arcname(arcname)
+        tarinfo = self.tarinfo()
+        tarinfo.tarfile = self
 
         # Use os.stat or os.lstat, depending on platform
         # and if symlinks shall be resolved.
-        if hasattr(os, "lstat") and not self.dereference:
-            statres = os.lstat(name)
+        if fileobj is None:
+            if hasattr(os, "lstat") and not self.dereference:
+                statres = os.lstat(name)
+            else:
+                statres = os.stat(name)
         else:
-            statres = os.stat(name)
-
-        if not tarinfo.init_from_stat(statres): return None
-
-        if tarinfo.type == REGTYPE:
-            inode = (statres.st_ino, statres.st_dev, statres.st_mtime)
-            if inode in self.inodes.keys() and not self.dereference:
+            statres = os.fstat(fileobj.fileno())
+        linkname = ""
+
+        stmd = statres.st_mode
+        if stat.S_ISREG(stmd):
+            inode = (statres.st_ino, statres.st_dev)
+            if not self.dereference and statres.st_nlink > 1 and \
+                    inode in self.inodes and arcname != self.inodes[inode]:
                 # Is it a hardlink to an already
                 # archived file?
-                tarinfo.type = LNKTYPE
-                tarinfo.linkname = self.inodes[inode]
+                type = LNKTYPE
+                linkname = self.inodes[inode]
             else:
                 # The inode is added only if its valid.
                 # For win32 it is always 0.
-                if inode[0]: self.inodes[inode] = tarinfo.name
-        elif tarinfo.type == SYMTYPE:
-            tarinfo.linkname = os.readlink(name)
-            tarinfo.size = 0
-
+                type = REGTYPE
+                if inode[0]:
+                    self.inodes[inode] = arcname
+        elif stat.S_ISDIR(stmd):
+            type = DIRTYPE
+        elif stat.S_ISFIFO(stmd):
+            type = FIFOTYPE
+        elif stat.S_ISLNK(stmd):
+            type = SYMTYPE
+            linkname = os.readlink(name)
+        elif stat.S_ISCHR(stmd):
+            type = CHRTYPE
+        elif stat.S_ISBLK(stmd):
+            type = BLKTYPE
+        else:
+            return None
+
+        # Fill the TarInfo object with all
+        # information we can get.
+        tarinfo.name = arcname
+        tarinfo.mode = stmd
+        tarinfo.uid = statres.st_uid
+        tarinfo.gid = statres.st_gid
+        if type == REGTYPE:
+            tarinfo.size = statres.st_size
+        else:
+            tarinfo.size = 0L
+        tarinfo.mtime = statres.st_mtime
+        tarinfo.type = type
+        tarinfo.linkname = linkname
+        if pwd:
+            try:
+                tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
+            except KeyError:
+                pass
+        if grp:
+            try:
+                tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
+            except KeyError:
+                pass
+
+        if type in (CHRTYPE, BLKTYPE):
+            if hasattr(os, "major") and hasattr(os, "minor"):
+                tarinfo.devmajor = os.major(statres.st_rdev)
+                tarinfo.devminor = os.minor(statres.st_rdev)
         return tarinfo
 
-    def list(self, verbose=1):
-        """Print a formatted listing of the archive's
-           contents to stdout.
+    def list(self, verbose=True):
+        """Print a table of contents to sys.stdout. If `verbose' is False, only
+           the names of the members are printed. If it is True, an `ls -l'-like
+           output is produced.
         """
+        self._check()
+
         for tarinfo in self:
             if verbose:
                 print filemode(tarinfo.mode),
-                print tarinfo.uname + "/" + tarinfo.gname,
+                print "%s/%s" % (tarinfo.uname or tarinfo.uid,
+                                 tarinfo.gname or tarinfo.gid),
                 if tarinfo.ischr() or tarinfo.isblk():
-                    print "%10s" % (str(tarinfo.devmajor) + "," + str(tarinfo.devminor)),
+                    print "%10s" % ("%d,%d" \
+                                    % (tarinfo.devmajor, tarinfo.devminor)),
                 else:
                     print "%10d" % tarinfo.size,
                 print "%d-%02d-%02d %02d:%02d:%02d" \
-                      % time.gmtime(tarinfo.mtime)[:6],
+                      % time.localtime(tarinfo.mtime)[:6],
 
-            print tarinfo.name,
+						if tarinfo.isdir():
+		            print tarinfo.name + "/",
+						else:
+								print tarinfo.name,
 
             if verbose:
                 if tarinfo.issym():
@@ -693,254 +1943,290 @@
                     print "link to", tarinfo.linkname,
             print
 
-    def add(self, name, arcname=None, recursive=1):
-        """Add a file or a directory to the archive.
-           Directory addition is recursive by default.
+    def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
+        """Add the file `name' to the archive. `name' may be any type of file
+           (directory, fifo, symbolic link, etc.). If given, `arcname'
+           specifies an alternative name for the file in the archive.
+           Directories are added recursively by default. This can be avoided by
+           setting `recursive' to False. `exclude' is a function that should
+           return True for each filename to be excluded. `filter' is a function
+           that expects a TarInfo object argument and returns the changed
+           TarInfo object, if it returns None the TarInfo object will be
+           excluded from the archive.
         """
-        if not self.fileobj:
-            raise ValueError, "I/O operation on closed file"
-        if self._mode == "r":
-            raise ValueError, "writing to a read-mode file"
+        self._check("aw")
 
         if arcname is None:
             arcname = name
 
+        # Exclude pathnames.
+        if exclude is not None:
+            import warnings
+            warnings.warn("use the filter argument instead",
+                    DeprecationWarning, 2)
+            if exclude(name):
+                self._dbg(2, "tarfile: Excluded %r" % name)
+                return
+
         # Skip if somebody tries to archive the archive...
-        if os.path.abspath(name) == os.path.abspath(self.name):
-            self._dbg(2, "tarfile: Skipped `%s'\n" % name)
-            return
-
-        # Special case: The user wants to add the current
-        # working directory.
-        if name == ".":
-            if recursive:
-                if arcname == ".":
-                    arcname = ""
-                for f in os.listdir("."):
-                    self.add(f, os.path.join(arcname, f))
-            return
-
-        self._dbg(1, "%s\n" % name)
-
-        # Create a TarInfo instance from the file.
+        if self.name is not None and os.path.abspath(name) == self.name:
+            self._dbg(2, "tarfile: Skipped %r" % name)
+            return
+
+        self._dbg(1, name)
+
+        # Create a TarInfo object from the file.
         tarinfo = self.gettarinfo(name, arcname)
 
         if tarinfo is None:
-            self._dbg(1, "tarfile: Unsupported type `%s'\n" % name)
+            self._dbg(1, "tarfile: Unsupported type %r" % name)
+            return
 
+        # Change or exclude the TarInfo object.
+        if filter is not None:
+            tarinfo = filter(tarinfo)
+            if tarinfo is None:
+                self._dbg(2, "tarfile: Excluded %r" % name)
+                return
 
         # Append the tar header and data to the archive.
         if tarinfo.isreg():
-            f = __builtin__.file(name, "rb")
-            self.addfile(tarinfo, fileobj = f)
+            f = bltn_open(name, "rb")
+            self.addfile(tarinfo, f)
             f.close()
 
-        if tarinfo.type in (LNKTYPE, SYMTYPE, FIFOTYPE, CHRTYPE, BLKTYPE):
-            tarinfo.size = 0l
-            self.addfile(tarinfo)
-
-        if tarinfo.isdir():
+        elif tarinfo.isdir():
             self.addfile(tarinfo)
             if recursive:
                 for f in os.listdir(name):
-                    self.add(os.path.join(name, f), os.path.join(arcname, f))
+                    self.add(os.path.join(name, f), os.path.join(arcname, f),
+                            recursive, exclude, filter)
+
+        else:
+            self.addfile(tarinfo)
 
     def addfile(self, tarinfo, fileobj=None):
-        """Add the content of fileobj to the tarfile.
-           The amount of bytes to read is determined by
-           the size attribute in the tarinfo instance.
+        """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
+           given, tarinfo.size bytes are read from it and added to the archive.
+           You can create TarInfo objects using gettarinfo().
+           On Windows platforms, `fileobj' should always be opened with mode
+           'rb' to avoid irritation about the file size.
         """
-        if not self.fileobj:
-            raise ValueError, "I/O operation on closed file"
-        if self._mode == "r":
-            raise ValueError, "writing to a read-mode file"
-
-        # XXX What was this good for again?
-        #try:
-        #    self.fileobj.seek(self.chunks[-1])
-        #except IOError:
-        #    pass
-
-        full_headers = self._get_full_headers(tarinfo)
-        self.fileobj.write(full_headers)
-        assert len(full_headers) % BLOCKSIZE == 0
-        self.offset += len(full_headers)
+        self._check("aw")
+
+        tarinfo = copy.copy(tarinfo)
+
+        buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
+        self.fileobj.write(buf)
+        self.offset += len(buf)
 
         # If there's data to follow, append it.
         if fileobj is not None:
             copyfileobj(fileobj, self.fileobj, tarinfo.size)
             blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
             if remainder > 0:
-                self.fileobj.write("\0" * (BLOCKSIZE - remainder))
+                self.fileobj.write(NUL * (BLOCKSIZE - remainder))
                 blocks += 1
             self.offset += blocks * BLOCKSIZE
 
-        #self.members.append(tarinfo)  #These take up too much memory
-        #self.membernames.append(tarinfo.name)
-        #self.chunks.append(self.offset)
-
-    def _get_full_headers(self, tarinfo):
-        """Return string containing headers around tarinfo, including gnulongs
-        """
-        buf = ""
-        # Now we must check if the strings for filename
-        # and linkname fit into the posix header.
-        # (99 chars + "\0" for each)
-        # If not, we must create GNU extension headers.
-        # If both filename and linkname are too long,
-        # the longlink is first to be written out.
-        if len(tarinfo.linkname) >= LENGTH_LINK - 1:
-            buf += self._return_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
-            tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
-        if len(tarinfo.name) >= LENGTH_NAME - 1:
-            buf += self._return_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
-            tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
-        return buf + tarinfo.getheader()
-
-#    def untar(self, path):
-#        """Untar the whole archive to path.
-#        """
-#        later = []
-#        for tarinfo in self:
-#            if tarinfo.isdir():
-#                later.append(tarinfo)
-#            self.extract(tarinfo, path)
-#        for tarinfo in later:
-#            self._utime(tarinfo, os.path.join(path, tarinfo.name))
-
-    def extractfile(self, member):
-        """Extract member from the archive and return a file-like
-           object. member may be a name or a TarInfo instance.
-        """
-        if not self.fileobj:
-            raise ValueError, "I/O operation on closed file"
-        if self._mode != "r":
-            raise ValueError, "reading from a write-mode file"
-
-        if isinstance(member, TarInfo):
-            tarinfo = member
-        else:
-            tarinfo = self.getmember(member)
-
-        if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
-            return _FileObject(self, tarinfo)
-        elif tarinfo.islnk() or tarinfo.issym():
-            return self.extractfile(self._getmember(tarinfo.linkname, tarinfo))
-        else:
-            return None
+        self.members.append(tarinfo)
+
+    def extractall(self, path=".", members=None):
+        """Extract all members from the archive to the current working
+           directory and set owner, modification time and permissions on
+           directories afterwards. `path' specifies a different directory
+           to extract to. `members' is optional and must be a subset of the
+           list returned by getmembers().
+        """
+        directories = []
+
+        if members is None:
+            members = self
+
+        for tarinfo in members:
+            if tarinfo.isdir():
+                # Extract directories with a safe mode.
+                directories.append(tarinfo)
+                tarinfo = copy.copy(tarinfo)
+                tarinfo.mode = 0700
+            self.extract(tarinfo, path)
+
+        # Reverse sort directories.
+        directories.sort(key=operator.attrgetter('name'))
+        directories.reverse()
+
+        # Set correct owner, mtime and filemode on directories.
+        for tarinfo in directories:
+            dirpath = os.path.join(path, tarinfo.name)
+            try:
+                self.chown(tarinfo, dirpath)
+                self.utime(tarinfo, dirpath)
+                self.chmod(tarinfo, dirpath)
+            except ExtractError, e:
+                if self.errorlevel > 1:
+                    raise
+                else:
+                    self._dbg(1, "tarfile: %s" % e)
 
     def extract(self, member, path=""):
-        """Extract member from the archive and write it to
-           current working directory using its full pathname.
-           If optional path is given, it is attached before the
-           pathname.
-           member may be a name or a TarInfo instance.
+        """Extract a member from the archive to the current working directory,
+           using its full name. Its file information is extracted as accurately
+           as possible. `member' may be a filename or a TarInfo object. You can
+           specify a different directory using `path'.
         """
-        if not self.fileobj:
-            raise ValueError, "I/O operation on closed file"
-        if self._mode != "r":
-            raise ValueError, "reading from a write-mode file"
+        self._check("r")
 
-        if isinstance(member, TarInfo):
+        if isinstance(member, basestring):
+            tarinfo = self.getmember(member)
+        else:
             tarinfo = member
-        else:
-            tarinfo = self.getmember(member)
-
-        self._dbg(1, tarinfo.name)
+
+        # Prepare the link target for makelink().
+        if tarinfo.islnk():
+            tarinfo._link_target = os.path.join(path, tarinfo.linkname)
+
         try:
             self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
         except EnvironmentError, e:
             if self.errorlevel > 0:
                 raise
             else:
-                self._dbg(1, "\ntarfile: %s `%s'" % (e.strerror, e.filename))
-        except TarError, e:
+                if e.filename is None:
+                    self._dbg(1, "tarfile: %s" % e.strerror)
+                else:
+                    self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
+        except ExtractError, e:
             if self.errorlevel > 1:
                 raise
             else:
-                self._dbg(1, "\ntarfile: %s" % e)
-        self._dbg(1, "\n")
+                self._dbg(1, "tarfile: %s" % e)
+
+    def extractfile(self, member):
+        """Extract a member from the archive as a file object. `member' may be
+           a filename or a TarInfo object. If `member' is a regular file, a
+           file-like object is returned. If `member' is a link, a file-like
+           object is constructed from the link's target. If `member' is none of
+           the above, None is returned.
+           The file-like object is read-only and provides the following
+           methods: read(), readline(), readlines(), seek() and tell()
+        """
+        self._check("r")
+
+        if isinstance(member, basestring):
+            tarinfo = self.getmember(member)
+        else:
+            tarinfo = member
+
+        if tarinfo.isreg():
+            return self.fileobject(self, tarinfo)
+
+        elif tarinfo.type not in SUPPORTED_TYPES:
+            # If a member's type is unknown, it is treated as a
+            # regular file.
+            return self.fileobject(self, tarinfo)
+
+        elif tarinfo.islnk() or tarinfo.issym():
+            if isinstance(self.fileobj, _Stream):
+                # A small but ugly workaround for the case that someone tries
+                # to extract a (sym)link as a file-object from a non-seekable
+                # stream of tar blocks.
+                raise StreamError("cannot extract (sym)link as file object")
+            else:
+                # A (sym)link's file object is its target's file object.
+                return self.extractfile(self._find_link_target(tarinfo))
+        else:
+            # If there's no data associated with the member (directory, chrdev,
+            # blkdev, etc.), return None instead of a file object.
+            return None
 
     def _extract_member(self, tarinfo, targetpath):
-        """Extract the TarInfo instance tarinfo to a physical
+        """Extract the TarInfo object tarinfo to a physical
            file called targetpath.
         """
-        # Fetch the TarInfo instance for the given name
+        # Fetch the TarInfo object for the given name
         # and build the destination pathname, replacing
         # forward slashes to platform specific separators.
-        if targetpath[-1:] == "/":
-            targetpath = targetpath[:-1]
-        targetpath = os.path.normpath(targetpath)
+        targetpath = targetpath.rstrip("/")
+        targetpath = targetpath.replace("/", os.sep)
 
         # Create all upper directories.
         upperdirs = os.path.dirname(targetpath)
         if upperdirs and not os.path.exists(upperdirs):
-            ti = TarInfo()
-            ti.name  = ""
-            ti.type  = DIRTYPE
-            ti.mode  = 0777
-            ti.mtime = tarinfo.mtime
-            ti.uid   = tarinfo.uid
-            ti.gid   = tarinfo.gid
-            ti.uname = tarinfo.uname
-            ti.gname = tarinfo.gname
-            for d in os.path.split(os.path.splitdrive(upperdirs)[1]):
-                ti.name = os.path.join(ti.name, d)
-                self._extract_member(ti, ti.name)
+            # Create directories that are not part of the archive with
+            # default permissions.
+            os.makedirs(upperdirs)
+
+        if tarinfo.islnk() or tarinfo.issym():
+            self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
+        else:
+            self._dbg(1, tarinfo.name)
 
         if tarinfo.isreg():
-            self._makefile(tarinfo, targetpath)
+            self.makefile(tarinfo, targetpath)
         elif tarinfo.isdir():
-            self._makedir(tarinfo, targetpath)
+            self.makedir(tarinfo, targetpath)
         elif tarinfo.isfifo():
-            self._makefifo(tarinfo, targetpath)
+            self.makefifo(tarinfo, targetpath)
         elif tarinfo.ischr() or tarinfo.isblk():
-            self._makedev(tarinfo, targetpath)
+            self.makedev(tarinfo, targetpath)
         elif tarinfo.islnk() or tarinfo.issym():
-            self._makelink(tarinfo, targetpath)
+            self.makelink(tarinfo, targetpath)
+        elif tarinfo.type not in SUPPORTED_TYPES:
+            self.makeunknown(tarinfo, targetpath)
         else:
-            self._makefile(tarinfo, targetpath)
-            if tarinfo.type not in SUPPORTED_TYPES:
-                self._dbg(1, "\ntarfile: Unknown file type '%s', " \
-                             "extracted as regular file." % tarinfo.type)
+            self.makefile(tarinfo, targetpath)
 
+        self.chown(tarinfo, targetpath)
         if not tarinfo.issym():
-            self._chown(tarinfo, targetpath)
-            self._chmod(tarinfo, targetpath)
-            if not tarinfo.isdir():
-                self._utime(tarinfo, targetpath)
-
-    def _makedir(self, tarinfo, targetpath):
-        """Make a directory called targetpath out of tarinfo.
+            self.chmod(tarinfo, targetpath)
+            self.utime(tarinfo, targetpath)
+
+    #--------------------------------------------------------------------------
+    # Below are the different file methods. They are called via
+    # _extract_member() when extract() is called. They can be replaced in a
+    # subclass to implement other functionality.
+
+    def makedir(self, tarinfo, targetpath):
+        """Make a directory called targetpath.
         """
         try:
-            os.mkdir(targetpath)
+            # Use a safe mode for the directory, the real mode is set
+            # later in _extract_member().
+            os.mkdir(targetpath, 0700)
         except EnvironmentError, e:
             if e.errno != errno.EEXIST:
                 raise
 
-    def _makefile(self, tarinfo, targetpath):
-        """Make a file called targetpath out of tarinfo.
+    def makefile(self, tarinfo, targetpath):
+        """Make a file called targetpath.
         """
         source = self.extractfile(tarinfo)
-        target = __builtin__.file(targetpath, "wb")
+        target = bltn_open(targetpath, "wb")
         copyfileobj(source, target)
         source.close()
         target.close()
 
-    def _makefifo(self, tarinfo, targetpath):
-        """Make a fifo called targetpath out of tarinfo.
+    def makeunknown(self, tarinfo, targetpath):
+        """Make a file from a TarInfo object with an unknown type
+           at targetpath.
+        """
+        self.makefile(tarinfo, targetpath)
+        self._dbg(1, "tarfile: Unknown file type %r, " \
+                     "extracted as regular file." % tarinfo.type)
+
+    def makefifo(self, tarinfo, targetpath):
+        """Make a fifo called targetpath.
         """
         if hasattr(os, "mkfifo"):
             os.mkfifo(targetpath)
         else:
-            raise TarError, "Fifo not supported by system"
+            raise ExtractError("fifo not supported by system")
 
-    def _makedev(self, tarinfo, targetpath):
-        """Make a character or block device called targetpath out of tarinfo.
+    def makedev(self, tarinfo, targetpath):
+        """Make a character or block device called targetpath.
         """
-        if not hasattr(os, "mknod"):
-            raise TarError, "Special devices not supported by system"
+        if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
+            raise ExtractError("special devices not supported by system")
 
         mode = tarinfo.mode
         if tarinfo.isblk():
@@ -948,109 +2234,187 @@
         else:
             mode |= stat.S_IFCHR
 
-        # This if statement should go away when python-2.3a0-devicemacros
-        # patch succeeds.
-        if hasattr(os, "makedev"):
-            os.mknod(targetpath, mode,
-                     os.makedev(tarinfo.devmajor, tarinfo.devminor))
-        else:
-            os.mknod(targetpath, mode,
-                     tarinfo.devmajor, tarinfo.devminor)
+        os.mknod(targetpath, mode,
+                 os.makedev(tarinfo.devmajor, tarinfo.devminor))
 
-    def _makelink(self, tarinfo, targetpath):
-        """Make a (symbolic) link called targetpath out of tarinfo.
-           If it cannot be made (due to platform or failure), we try
-           to make a copy of the referenced file instead of a link.
+    def makelink(self, tarinfo, targetpath):
+        """Make a (symbolic) link called targetpath. If it cannot be created
+          (platform limitation), we try to make a copy of the referenced file
+          instead of a link.
         """
-        linkpath = tarinfo.linkname
-        self._dbg(1, " -> %s" % linkpath)
-        try:
+        if hasattr(os, "symlink") and hasattr(os, "link"):
+            # For systems that support symbolic and hard links.
             if tarinfo.issym():
-                os.symlink(linkpath, targetpath)
+                os.symlink(tarinfo.linkname, targetpath)
             else:
-                linkpath = os.path.join(os.path.dirname(targetpath),
-                                        linkpath)
-                os.link(linkpath, targetpath)
-        except AttributeError:
-            linkpath = os.path.join(os.path.dirname(tarinfo.name),
-                                    tarinfo.linkname)
-            linkpath = normpath(linkpath)
+                # See extract().
+                if os.path.exists(tarinfo._link_target):
+                    os.link(tarinfo._link_target, targetpath)
+                else:
+                    self._extract_member(self._find_link_target(tarinfo), targetpath)
+        else:
             try:
-                self._extract_member(self.getmember(linkpath), targetpath)
-            except (IOError, OSError, KeyError), e: #@UnusedVariable
-                linkpath = os.path.normpath(linkpath)
-                try:
-                    shutil.copy2(linkpath, targetpath)
-                except EnvironmentError, e: #@UnusedVariable
-                    raise TarError, "Link could not be created"
+                self._extract_member(self._find_link_target(tarinfo), targetpath)
+            except KeyError:
+                raise ExtractError("unable to resolve link inside archive")
 
-    def _chown(self, tarinfo, targetpath):
+    def chown(self, tarinfo, targetpath):
         """Set owner of targetpath according to tarinfo.
         """
-        if pwd and os.geteuid() == 0:
+        if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
             # We have to be root to do so.
-            try: g = gname2gid(tarinfo.gname)
-            except KeyError:
-                try:
-                    gid2gname(tarinfo.gid) # Make sure gid exists
-                    g = tarinfo.gid
-                except KeyError: g = os.getgid()
-            try: u = uname2uid(tarinfo.uname)
-            except KeyError:
-                try:
-                    uid2uname(tarinfo.uid) # Make sure uid exists
-                    u = tarinfo.uid
-                except KeyError: u = os.getuid()
+            try:
+                g = grp.getgrnam(tarinfo.gname)[2]
+            except KeyError:
+                try:
+                    g = grp.getgrgid(tarinfo.gid)[2]
+                except KeyError:
+                    g = os.getgid()
+            try:
+                u = pwd.getpwnam(tarinfo.uname)[2]
+            except KeyError:
+                try:
+                    u = pwd.getpwuid(tarinfo.uid)[2]
+                except KeyError:
+                    u = os.getuid()
             try:
                 if tarinfo.issym() and hasattr(os, "lchown"):
                     os.lchown(targetpath, u, g)
                 else:
-                    os.chown(targetpath, u, g)
+                    if sys.platform != "os2emx":
+                        os.chown(targetpath, u, g)
             except EnvironmentError, e:
-                self._dbg(2, "\ntarfile: (chown failed), %s `%s'"
-                             % (e.strerror, e.filename))
+                raise ExtractError("could not change owner")
 
-    def _chmod(self, tarinfo, targetpath):
+    def chmod(self, tarinfo, targetpath):
         """Set file permissions of targetpath according to tarinfo.
         """
-        try:
-            os.chmod(targetpath, tarinfo.mode)
-        except EnvironmentError, e:
-            self._dbg(2, "\ntarfile: (chmod failed), %s `%s'"
-                         % (e.strerror, e.filename))
+        if hasattr(os, 'chmod'):
+            try:
+                os.chmod(targetpath, tarinfo.mode)
+            except EnvironmentError, e:
+                raise ExtractError("could not change mode")
 
-    def _utime(self, tarinfo, targetpath):
+    def utime(self, tarinfo, targetpath):
         """Set modification time of targetpath according to tarinfo.
         """
+        if not hasattr(os, 'utime'):
+            return
         try:
             os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
         except EnvironmentError, e:
-            self._dbg(2, "\ntarfile: (utime failed), %s `%s'"
-                         % (e.strerror, e.filename))
-
-    def _getmember(self, name, tarinfo=None):
+            raise ExtractError("could not change modification time")
+
+    #--------------------------------------------------------------------------
+    def next(self):
+        """Return the next member of the archive as a TarInfo object, when
+           TarFile is opened for reading. Return None if there is no more
+           available.
+        """
+        self._check("ra")
+        if self.firstmember is not None:
+            m = self.firstmember
+            self.firstmember = None
+            return m
+
+        # Read the next block.
+        self.fileobj.seek(self.offset)
+        tarinfo = None
+        while True:
+            try:
+                tarinfo = self.tarinfo.fromtarfile(self)
+            except EOFHeaderError, e:
+                if self.ignore_zeros:
+                    self._dbg(2, "0x%X: %s" % (self.offset, e))
+                    self.offset += BLOCKSIZE
+                    continue
+            except InvalidHeaderError, e:
+                if self.ignore_zeros:
+                    self._dbg(2, "0x%X: %s" % (self.offset, e))
+                    self.offset += BLOCKSIZE
+                    continue
+                elif self.offset == 0:
+                    raise ReadError(str(e))
+            except EmptyHeaderError:
+                if self.offset == 0:
+                    raise ReadError("empty file")
+            except TruncatedHeaderError, e:
+                if self.offset == 0:
+                    raise ReadError(str(e))
+            except SubsequentHeaderError, e:
+                raise ReadError(str(e))
+            break
+
+        if tarinfo is not None:
+            self.members.append(tarinfo)
+        else:
+            self._loaded = True
+
+        return tarinfo
+
+    #--------------------------------------------------------------------------
+    # Little helper methods:
+
+    def _getmember(self, name, tarinfo=None, normalize=False):
         """Find an archive member by name from bottom to top.
            If tarinfo is given, it is used as the starting point.
         """
-        if tarinfo is None:
-            end = len(self.members)
-        else:
-            end = self.members.index(tarinfo)
-
-        for i in xrange(end - 1, -1, -1):
-            if name == self.membernames[i]:
-                return self.members[i]
+        # Ensure that all members have been loaded.
+        members = self.getmembers()
+
+        # Limit the member search list up to tarinfo.
+        if tarinfo is not None:
+            members = members[:members.index(tarinfo)]
+
+        if normalize:
+            name = os.path.normpath(name)
+
+        for member in reversed(members):
+            if normalize:
+                member_name = os.path.normpath(member.name)
+            else:
+                member_name = member.name
+
+            if name == member_name:
+                return member
 
     def _load(self):
         """Read through the entire archive file and look for readable
            members.
         """
-        while 1:
+        while True:
             tarinfo = self.next()
             if tarinfo is None:
                 break
-        self._loaded = 1
-        return
+        self._loaded = True
+
+    def _check(self, mode=None):
+        """Check if TarFile is still open, and if the operation's mode
+           corresponds to TarFile's mode.
+        """
+        if self.closed:
+            raise IOError("%s is closed" % self.__class__.__name__)
+        if mode is not None and self.mode not in mode:
+            raise IOError("bad operation for mode %r" % self.mode)
+
+    def _find_link_target(self, tarinfo):
+        """Find the target member of a symlink or hardlink member in the
+           archive.
+        """
+        if tarinfo.issym():
+            # Always search the entire archive.
+            linkname = os.path.dirname(tarinfo.name) + "/" + tarinfo.linkname
+            limit = None
+        else:
+            # Search the archive before the link, because a hard link is
+            # just a reference to an already archived file.
+            linkname = tarinfo.linkname
+            limit = tarinfo
+
+        member = self._getmember(linkname, tarinfo=limit, normalize=True)
+        if member is None:
+            raise KeyError("linkname %r not found" % linkname)
+        return member
 
     def __iter__(self):
         """Provide an iterator object.
@@ -1060,136 +2424,25 @@
         else:
             return TarIter(self)
 
-    def _buftoinfo(self, buf):
-        """Transform a 512 byte block to a TarInfo instance.
-        """
-        tarinfo = TarInfo()
-        tarinfo.name = nts(buf[0:100])
-        tarinfo.mode = int(buf[100:107], 8)
-        tarinfo.uid = int(buf[108:115],8)
-        tarinfo.gid = int(buf[116:123],8)
-        tarinfo.size = long(buf[124:135], 8)
-        tarinfo.mtime = long(buf[136:147], 8)
-        # chksum stored as a six digit octal number with
-        # leading zeroes followed by a nul and then a space
-        tarinfo.chksum = int(buf[148:154], 8)
-        tarinfo.type = buf[156:157]
-        tarinfo.linkname = nts(buf[157:257])
-        tarinfo.uname = nts(buf[265:297])
-        tarinfo.gname = nts(buf[297:329])
-        try:
-            tarinfo.devmajor = int(buf[329:336], 8)
-            tarinfo.devminor = int(buf[337:344], 8)
-        except ValueError:
-            tarinfo.devmajor = tarinfo.devmajor = 0
-        tarinfo.prefix = buf[345:500]
-        if tarinfo.chksum != calc_chksum(buf):
-            self._dbg(1, "tarfile: Bad Checksum\n")
-        return tarinfo
-
-    def _proc_gnulong(self, tarinfo, type):
-        """Evaluate the blocks that hold a GNU longname
-           or longlink member.
-        """
-        name = None
-        linkname = None
-        #may be some sanity checking should be done here
-        #assert tarinfo.size < 1000 * BLOCKSIZE, "Filename appears to be too long!"
-        buf = self.fileobj.read(BLOCKSIZE)
-        if not buf: return None
-        namesize = tarinfo.size - BLOCKSIZE
-        self.offset += BLOCKSIZE
-        # may be the whole name should be read with one operation?
-        while namesize > 0:
-            buf += self.fileobj.read(BLOCKSIZE)
-            if not buf: return None
-            self.offset += BLOCKSIZE
-            namesize -= BLOCKSIZE
-        if type == GNUTYPE_LONGNAME: name = nts(buf)
-        if type == GNUTYPE_LONGLINK: linkname = nts(buf)
-
-        buf = self.fileobj.read(BLOCKSIZE)
-        if not buf: return None
-        tarinfo = self._buftoinfo(buf)
-        if tarinfo.type in (GNUTYPE_LONGLINK, GNUTYPE_LONGNAME):
-            tarinfo = self._proc_gnulong(tarinfo, tarinfo.type)
-            if not tarinfo: return None
-        if name is not None:
-            tarinfo.name = name
-        if linkname is not None:
-            tarinfo.linkname = linkname
-        self.offset += BLOCKSIZE
-        return tarinfo
-
-
-
-    def _return_gnulong(self, name, type):
-        """Insert a GNU longname/longlink member into the archive.
-           It consists of a common tar header, with the length
-           of the longname as size, followed by a data block,
-           which contains the longname as a null terminated string.
-        """
-        tarinfo = TarInfo()
-        tarinfo.name = "././@LongLink"
-        tarinfo.type = type
-        tarinfo.mode = 0
-        tarinfo.size = len(name)
-
-        residual = (tarinfo.size % BLOCKSIZE)
-        return "%s%s%s" % (tarinfo.getheader(), name,
-                           "\0" * ((BLOCKSIZE - residual) * (residual > 0)))
-
-    def _proc_sparse(self, tarinfo):
-        """Analyze a GNU sparse header plus extra headers.
-        """
-        buf = tarinfo.getheader()
-        sp = _ringbuffer()
-        pos = 386
-        lastpos = 0l
-        realpos = 0l
-        try:
-            # There are 4 possible sparse structs in the
-            # first header.
-            for i in range(4): #@UnusedVariable
-                offset = int(buf[pos:pos + 12], 8)
-                numbytes = int(buf[pos + 12:pos + 24], 8)
-                if offset > lastpos:
-                    sp.append(_hole(lastpos, offset - lastpos))
-                sp.append(_data(offset, numbytes, realpos))
-                realpos += numbytes
-                lastpos = offset + numbytes
-                pos += 24
-
-            isextended = ord(buf[482])
-            origsize = int(buf[483:495], 8)
-
-            # If the isextended flag is given,
-            # there are extra headers to process.
-            while isextended == 1:
-                buf = self.fileobj.read(BLOCKSIZE)
-                self.offset += BLOCKSIZE
-                pos = 0
-                for i in range(21): #@UnusedVariable
-                    offset = int(buf[pos:pos + 12], 8)
-                    numbytes = int(buf[pos + 12:pos + 24], 8)
-                    if offset > lastpos:
-                        sp.append(_hole(lastpos, offset - lastpos))
-                    sp.append(_data(offset, numbytes, realpos))
-                    realpos += numbytes
-                    lastpos = offset + numbytes
-                    pos += 24
-                isextended = ord(buf[504])
-        except ValueError:
-            pass
-        if lastpos < origsize:
-            sp.append(_hole(lastpos, origsize - lastpos))
-
-        tarinfo.sparse = sp
-        return origsize
-
     def _dbg(self, level, msg):
+        """Write debugging output to sys.stderr.
+        """
         if level <= self.debug:
-            sys.stdout.write(msg)
+            print >> sys.stderr, msg
+
+    def __enter__(self):
+        self._check()
+        return self
+
+    def __exit__(self, type, value, traceback):
+        if type is None:
+            self.close()
+        else:
+            # An exception occurred. We must not call close() because
+            # it would try to write end-of-archive blocks and padding.
+            if not self._extfileobj:
+                self.fileobj.close()
+            self.closed = True
 # class TarFile
 
 class TarIter:
@@ -1200,9 +2453,10 @@
     """
 
     def __init__(self, tarfile):
-        """Construct a TarIter instance.
+        """Construct a TarIter object.
         """
         self.tarfile = tarfile
+        self.index = 0
     def __iter__(self):
         """Return iterator object.
         """
@@ -1211,12 +2465,21 @@
         """Return the next item using TarFile's next() method.
            When all members have been read, set TarFile as _loaded.
         """
-        tarinfo = self.tarfile.next()
-        if not tarinfo:
-            self.tarfile._loaded = 1
-            raise StopIteration
+        # Fix for SF #1100429: Under rare circumstances it can
+        # happen that getmembers() is called during iteration,
+        # which will cause TarIter to stop prematurely.
+        if not self.tarfile._loaded:
+            tarinfo = self.tarfile.next()
+            if not tarinfo:
+                self.tarfile._loaded = True
+                raise StopIteration
+        else:
+            try:
+                tarinfo = self.tarfile.members[self.index]
+            except IndexError:
+                raise StopIteration
+        self.index += 1
         return tarinfo
-# class TarIter
 
 # Helper classes for sparse file support
 class _section:
@@ -1248,7 +2511,7 @@
         self.idx = 0
     def find(self, offset):
         idx = self.idx
-        while 1:
+        while True:
             item = self[idx]
             if offset in item:
                 break
@@ -1261,153 +2524,9 @@
         self.idx = idx
         return item
 
-class _FileObject:
-    """File-like object for reading an archive member,
-       is returned by TarFile.extractfile().
-       Support for sparse files included.
-    """
-
-    def __init__(self, tarfile, tarinfo):
-        self.tarfile = tarfile
-        self.fileobj = tarfile.fileobj
-        self.name    = tarinfo.name
-        self.mode    = "r"
-        self.closed  = 0
-        self.offset  = tarinfo.offset_data
-        self.size    = tarinfo.size
-        self.pos     = 0l
-        self.linebuffer = ""
-        if tarinfo.issparse():
-            self.sparse = tarinfo.sparse
-            self.read = self._readsparse
-        else:
-            self.read = self._readnormal
-
-    def readline(self, size=-1):
-        """Read a line with approx. size.
-           If size is negative, read a whole line.
-           readline() and read() must not be mixed up (!).
-        """
-        if size < 0:
-            size = sys.maxint
-
-        nl = self.linebuffer.find("\n")
-        if nl >= 0:
-            nl = min(nl, size)
-        else:
-            size -= len(self.linebuffer)
-            while nl < 0:
-                buf = self.read(min(size, 100))
-                if not buf:
-                    break
-                self.linebuffer += buf
-                size -= len(buf)
-                if size <= 0:
-                    break
-                nl = self.linebuffer.find("\n")
-            if nl == -1:
-                s = self.linebuffer
-                self.linebuffer = ""
-                return s
-        buf = self.linebuffer[:nl]
-        self.linebuffer = self.linebuffer[nl + 1:]
-        while buf[-1:] == "\r":
-            buf = buf[:-1]
-        return buf + "\n"
-
-    def readlines(self):
-        """Return a list with all (following) lines.
-        """
-        result = []
-        while 1:
-            line = self.readline()
-            if not line: break
-            result.append(line)
-        return result
-
-    def _readnormal(self, size=None):
-        """Read operation for regular files.
-        """
-        if self.closed:
-            raise ValueError, "I/O operation on closed file"
-        #self.fileobj.seek(self.offset + self.pos)
-        bytesleft = self.size - self.pos
-        if size is None:
-            bytestoread = bytesleft
-        else:
-            bytestoread = min(size, bytesleft)
-        self.pos += bytestoread
-        self.tarfile.offset += bytestoread
-        return self.fileobj.read(bytestoread)
-
-    def _readsparse(self, size=None):
-        """Read operation for sparse files.
-        """
-        if self.closed:
-            raise ValueError, "I/O operation on closed file"
-
-        if size is None:
-            size = self.size - self.pos
-
-        data = ""
-        while size > 0:
-            buf = self._readsparsesection(size)
-            if not buf:
-                break
-            size -= len(buf)
-            data += buf
-        return data
-
-    def _readsparsesection(self, size):
-        """Read a single section of a sparse file.
-        """
-        section = self.sparse.find(self.pos)
-
-        if section is None:
-            return ""
-
-        toread = min(size, section.offset + section.size - self.pos)
-        if isinstance(section, _data):
-            realpos = section.realpos + self.pos - section.offset
-            self.pos += toread
-            self.fileobj.seek(self.offset + realpos)
-            return self.fileobj.read(toread)
-        else:
-            self.pos += toread
-            return "\0" * toread
-
-    def tell(self):
-        """Return the current file position.
-        """
-        return self.pos
-
-    def seek(self, pos, whence=0):
-        """Seek to a position in the file.
-        """
-        self.linebuffer = ""
-        if whence == 0:
-            self.pos = min(max(pos, 0), self.size)
-        if whence == 1:
-            if pos < 0:
-                self.pos = max(self.pos + pos, 0)
-            else:
-                self.pos = min(self.pos + pos, self.size)
-        if whence == 2:
-            self.pos = max(min(self.size + pos, self.size), 0)
-
-    def close(self):
-        """Close the file object.
-        """
-        self.closed = 1
-#class _FileObject
-
 #---------------------------------------------
 # zipfile compatible TarFile class
-#
-# for details consult zipfile's documentation
 #---------------------------------------------
-import cStringIO
-
 TAR_PLAIN = 0           # zipfile.ZIP_STORED
 TAR_GZIPPED = 8         # zipfile.ZIP_DEFLATED
 class TarFileCompat:
@@ -1415,16 +2534,18 @@
        ZipFile class.
     """
     def __init__(self, file, mode="r", compression=TAR_PLAIN):
+        from warnings import warnpy3k
+        warnpy3k("the TarFileCompat class has been removed in Python 3.0",
+                stacklevel=2)
         if compression == TAR_PLAIN:
-            self.tarfile = open(file, mode)
+            self.tarfile = TarFile.taropen(file, mode)
         elif compression == TAR_GZIPPED:
-            self.tarfile = gzopen(file, mode)
+            self.tarfile = TarFile.gzopen(file, mode)
         else:
-            raise ValueError, "unknown compression constant"
+            raise ValueError("unknown compression constant")
         if mode[0:1] == "r":
             members = self.tarfile.getmembers()
-            for i in range(len(members)):
-                m = members[i]
+            for m in members:
                 m.filename = m.name
                 m.file_size = m.size
                 m.date_time = time.gmtime(m.mtime)[:6]
@@ -1444,270 +2565,32 @@
     def write(self, filename, arcname=None, compress_type=None):
         self.tarfile.add(filename, arcname)
     def writestr(self, zinfo, bytes):
+        try:
+            from cStringIO import StringIO
+        except ImportError:
+            from StringIO import StringIO
         import calendar
-        zinfo.name = zinfo.filename
-        zinfo.size = zinfo.file_size
-        zinfo.mtime = calendar.timegm(zinfo.date_time)
-        self.tarfile.addfile(zinfo, cStringIO.StringIO(bytes))
+        tinfo = TarInfo(zinfo.filename)
+        tinfo.size = len(bytes)
+        tinfo.mtime = calendar.timegm(zinfo.date_time)
+        self.tarfile.addfile(tinfo, StringIO(bytes))
     def close(self):
         self.tarfile.close()
 #class TarFileCompat
 
-if __name__ == "__main__":
-    # a "light-weight" implementation of GNUtar ;-)
-    usage = """
-Usage: %s [options] [files]
-
--h      display this help message
--c      create a tarfile
--r      append to an existing archive
--x      extract archive
--t      list archive contents
--f FILENAME
-        use archive FILENAME, else STDOUT (-c)
--z      filter archive through gzip
--C DIRNAME
-        with opt -x:     extract to directory DIRNAME
-        with opt -c, -r: put files to archive under DIRNAME
--v      verbose output
--q      quiet
-
-wildcards *, ?, [seq], [!seq] are accepted.
-    """ % sys.argv[0]
-
-    import getopt, glob
+#--------------------
+# exported functions
+#--------------------
+def is_tarfile(name):
+    """Return True if name points to a tar archive that we
+       are able to handle, else return False.
+    """
     try:
-        opts, args = getopt.getopt(sys.argv[1:], "htcrzxf:C:qv")
-    except getopt.GetoptError, e:
-        print
-        print "ERROR:", e
-        print usage
-        sys.exit(0)
-
-    file = None
-    mode = None
-    dir = None
-    comp = 0
-    debug = 0
-    for o, a in opts:
-        if o == "-t": mode = "l"        # list archive
-        if o == "-c": mode = "w"        # write to archive
-        if o == "-r": mode = "a"        # append to archive
-        if o == "-x": mode = "r"        # extract from archive
-        if o == "-f": file = a          # specify filename else use stdout
-        if o == "-C": dir = a           # change to dir
-        if o == "-z": comp = 1          # filter through gzip
-        if o == "-v": debug = 2         # verbose mode
-        if o == "-q": debug = 0         # quiet mode
-        if o == "-h":                   # help message
-            print usage
-            sys.exit(0)
-
-    if not mode:
-        print usage
-        sys.exit(0)
-
-    if comp:
-        func = gzopen
-    else:
-        func = open
-
-    if not file or file == "-":
-        if mode != "w":
-            print usage
-            sys.exit(0)
-        debug = 0
-        # If under Win32, set stdout to binary.
-        try:
-            import msvcrt
-            msvcrt.setmode(1, os.O_BINARY) #@UndefinedVariable
-        except ImportError:
-            pass
-        tarfile = func("sys.stdout.tar", mode, 9, sys.stdout)
-    else:
-        if mode == "l":
-            tarfile = func(file, "r")
-        else:
-            tarfile = func(file, mode)
-
-    tarfile.debug = debug
-
-    if mode == "r":
-        if dir is None:
-            dir = ""
-        for tarinfo in tarfile:
-            tarfile.extract(tarinfo, dir)
-    elif mode == "l":
-        tarfile.list(debug)
-    else:
-        for arg in args:
-            files = glob.glob(arg)
-            for f in files:
-                tarfile.add(f, dir)
-    tarfile.close()
-
-
-class TarFromIterator(TarFile):
-    """Readable tarfile-like object generated from iterator
-    """
-    # These various status numbers indicate what we are in the process
-    # of doing in the tarfile.
-    BEGIN = 0 # next step is to read tarinfo, write new header
-    MIDDLE_OF_FILE = 1 # in process of writing file data
-    END = 2 # end of data
-
-    # Buffer is added to in multiples of following
-    BUFFER_ADDLEN = 64 * 1024
-
-    def __init__(self, pair_iter):
-        """Construct a TarFromIterator instance.  pair_iter is an
-        iterator of (TarInfo, fileobj) objects, which fileobj should
-        be a file-like object opened for reading, or None.  The
-        fileobjs will be closed before the next element in the
-        iterator is read.
-        """
-        self.closed = None
-        self.name = None
-        self.mode = "rb"
-        self.pair_iter = pair_iter
-
-        self.init_datastructures()
-        self.status = self.BEGIN
-        self.cur_tarinfo, self.cur_fileobj = None, None
-        self.cur_pos_in_fileobj = 0
-        self.buffer = ""
-        # holds current position as seen by reading client.  This is
-        # distinct from self.offset.
-        self.tar_iter_offset = 0
-
-    def seek(self, offset):
-        """Seek to current position.  Just read and discard some amount"""
-        if offset < self.tar_iter_offset:
-            raise TarError("Seeks in TarFromIterator must go forwards,\n"
-                           "Instead asking for %s from %s" %
-                           (offset, self.tar_iter_offset))
-        while offset - self.tar_iter_offset >= self.BUFFER_ADDLEN:
-            buf = self.read(self.BUFFER_ADDLEN)
-            if not buf: return # eof
-        self.read(offset - self.tar_iter_offset)
-
-    def read(self, length = -1):
-        """Return next length bytes, or everything if length < 0"""
-        if length < 0:
-            while 1:
-                if not self._addtobuffer(): break
-            result = self.buffer
-            self.buffer = ""
-        else:
-            while len(self.buffer) < length:
-                if not self._addtobuffer(): break
-            # It's possible that length > len(self.buffer)
-            result = self.buffer[:length]
-            self.buffer = self.buffer[length:]
-        self.tar_iter_offset += len(result)
-        return result
-
-    def _addtobuffer(self):
-        """Write more data into the buffer.  Return None if at end"""
-        if self.status == self.BEGIN:
-            # Just write headers into buffer
-            try: self.cur_tarinfo, self.cur_fileobj = self.pair_iter.next()
-            except StopIteration:
-                self._add_final()
-                self.status = self.END
-                return None
-
-            # Zero out tarinfo sizes for various file types
-            if self.cur_tarinfo.type in (LNKTYPE, SYMTYPE,
-                                         FIFOTYPE, CHRTYPE, BLKTYPE):
-                self.cur_tarinfo.size = 0l
-
-            full_headers = self._get_full_headers(self.cur_tarinfo)
-            self.buffer += full_headers
-            self.offset += len(full_headers)
-            assert len(full_headers) % BLOCKSIZE == 0
-
-            if self.cur_fileobj is None: # no data with header
-                self.status = self.BEGIN
-                self._finish_fileobj()
-            else:
-                self.status = self.MIDDLE_OF_FILE
-                self.cur_pos_in_fileobj = 0
-            return 1
-        elif self.status == self.MIDDLE_OF_FILE:
-            # Add next chunk of self.cur_fileobj to self.buffer
-            l = min(self.BUFFER_ADDLEN,
-                    self.cur_tarinfo.size - self.cur_pos_in_fileobj)
-            s = self.cur_fileobj.read(l)
-            self.cur_pos_in_fileobj += len(s)
-            if len(s) == 0:
-                if l != 0: raise IOError, "end of file reached"
-                blocks, remainder = divmod(self.cur_tarinfo.size, BLOCKSIZE)
-                if remainder > 0:
-                    self.buffer += "\0" * (BLOCKSIZE - remainder)
-                    blocks += 1
-                self.cur_fileobj.close()
-                self.offset += blocks * BLOCKSIZE
-                self._finish_fileobj()
-                self.status = self.BEGIN
-            else: self.buffer += s
-            return 1
-        elif self.status == self.END: return None
-        assert 0
-
-    def _finish_fileobj(self):
-        """Update some variables when done writing fileobj"""
-        return # Skip saving tarinfo information to save memory
-        self.members.append(self.cur_tarinfo)
-        self.membernames.append(self.cur_tarinfo.name)
-        self.chunks.append(self.offset)
-
-    def _add_final(self):
-        """Add closing footer to buffer"""
-        blocks, remainder = divmod(self.offset, RECORDSIZE) #@UnusedVariable
-        if remainder > 0: self.buffer += "\0" * (RECORDSIZE - remainder)
-
-    def close(self):
-        """Close file obj"""
-        assert not self.closed
-        self.closed = 1
-
-
-def uid2uname(uid):
-    """Return uname of uid, or raise KeyError if none"""
-    if uid_dict is None: set_pwd_dict()
-    return uid_dict[uid]
-
-def uname2uid(uname):
-    """Return uid of given uname, or raise KeyError if none"""
-    if uname_dict is None: set_pwd_dict()
-    return uname_dict[uname]
-
-def set_pwd_dict():
-    """Set global pwd caching dictionaries uid_dict and uname_dict"""
-    global uid_dict, uname_dict
-    assert uid_dict is None and uname_dict is None and pwd
-    uid_dict = {}; uname_dict = {}
-    for entry in pwd.getpwall():
-        uname = entry[0]; uid = entry[2]
-        uid_dict[uid] = uname
-        uname_dict[uname] = uid
-
-def gid2gname(gid):
-    """Return group name of gid, or raise KeyError if none"""
-    if gid_dict is None: set_grp_dict()
-    return gid_dict[gid]
-
-def gname2gid(gname):
-    """Return gid of given group name, or raise KeyError if none"""
-    if gname_dict is None: set_grp_dict()
-    return gname_dict[gname]
-
-def set_grp_dict():
-    global gid_dict, gname_dict
-    assert gid_dict is None and gname_dict is None and grp
-    gid_dict = {}; gname_dict = {}
-    for entry in grp.getgrall():
-        gname = entry[0]; gid = entry[2]
-        gid_dict[gid] = gname
-        gname_dict[gname] = gid
+        t = open(name)
+        t.close()
+        return True
+    except TarError:
+        return False
+
+bltn_open = open
+open = TarFile.open

=== modified file 'duplicity/util.py'
--- duplicity/util.py	2011-08-18 19:17:55 +0000
+++ duplicity/util.py	2011-08-23 18:28:26 +0000
@@ -28,6 +28,8 @@
 import string
 import traceback
 
+from duplicity import tarfile
+
 import duplicity.globals as globals
 import duplicity.log as log
 
@@ -69,6 +71,31 @@
         else:
             raise
 
+class FakeTarFile:
+    debug = 0
+    def __iter__(self):
+        return iter([])
+    def close(self):
+        pass
+
+def make_tarfile(mode, fp):
+    # We often use 'empty' tarfiles for signatures that haven't been filled out
+    # yet.  So we want to ignore ReadError exceptions, which are used to signal
+    # this.
+    try:
+        return tarfile.TarFile("arbitrary", mode, fp)
+    except tarfile.ReadError:
+        return FakeTarFile()
+
+def get_tarinfo_name(ti):
+    # Python versions before 2.6 ensure that directories end with /, but 2.6
+    # and later ensure they they *don't* have /.  ::shrug::  Internally, we
+    # continue to use pre-2.6 method.
+    if ti.isdir() and not ti.name.endswith("/"):
+        return ti.name + "/"
+    else:
+        return ti.name
+
 def ignore_missing(fn, filename):
     """
     Execute fn on filename.  Ignore ENOENT errors, otherwise raise exception.

=== modified file 'rdiffdir'
--- rdiffdir	2010-11-20 15:39:00 +0000
+++ rdiffdir	2011-08-23 18:28:26 +0000
@@ -33,7 +33,6 @@
 from duplicity import diffdir
 from duplicity import patchdir
 from duplicity import log
-from duplicity import tarfile
 from duplicity import globals
 from duplicity import selection
 from duplicity import path

=== added file 'tarfile-CHANGES'
--- tarfile-CHANGES	1970-01-01 00:00:00 +0000
+++ tarfile-CHANGES	2011-08-23 18:28:26 +0000
@@ -0,0 +1,3 @@
+tarfile.py is a copy of python2.7's tarfile.py.
+
+No changes besides 2.4 compatibility have been made.

=== modified file 'tarfile-LICENSE'
--- tarfile-LICENSE	2002-10-29 01:49:46 +0000
+++ tarfile-LICENSE	2011-08-23 18:28:26 +0000
@@ -89,4 +89,4 @@
 README Version
 --------------
 
-$Id: tarfile-LICENSE,v 1.1 2002/10/29 01:49:46 bescoto Exp $
\ No newline at end of file
+$Id: tarfile-LICENSE,v 1.1 2002/10/29 01:49:46 bescoto Exp $

=== modified file 'testing/diffdirtest.py'
--- testing/diffdirtest.py	2010-11-20 15:32:59 +0000
+++ testing/diffdirtest.py	2011-08-23 18:28:26 +0000
@@ -26,6 +26,7 @@
 from duplicity.path import * #@UnusedWildImport
 from duplicity import diffdir
 from duplicity import selection
+from duplicity import util
 from duplicity import tarfile #@Reimport
 
 config.setup()
@@ -133,8 +134,9 @@
                          "snapshot/file_to_directory/"]
         for tarinfo in tarfile.TarFile("testfiles/output/dir1dir2.difftar",
                                        "r"):
-            if tarinfo.name in changed_files:
-                changed_files.remove(tarinfo.name)
+            tiname = util.get_tarinfo_name(tarinfo)
+            if tiname in changed_files:
+                changed_files.remove(tiname)
         assert not changed_files, ("Following files not found:\n"
                                    + "\n".join(changed_files))
 

=== modified file 'testing/patchdirtest.py'
--- testing/patchdirtest.py	2011-06-17 18:22:28 +0000
+++ testing/patchdirtest.py	2011-08-23 18:28:26 +0000
@@ -145,25 +145,18 @@
 
         def make_bad_tar(filename):
             """Write attack tarfile to filename"""
-            def iterate_one_pair(path):
-                """Iterate one (tarinfo, fp) pair
-
-                file object will be empty, and tarinfo will have path
-                "snapshot/../warning-security-error"
-
-                """
-                path.index = ("diff", "..", "warning-security-error")
-                ti = path.get_tarinfo()
-                fp = cStringIO.StringIO("")
-                yield (ti, fp)
+            tf = tarfile.TarFile(name=filename, mode="w")
+
+            # file object will be empty, and tarinfo will have path
+            # "snapshot/../warning-security-error"
             assert not os.system("cat /dev/null >testfiles/output/file")
-            tf = tarfile.TarFromIterator(iterate_one_pair(
-                Path("testfiles/output/file")))
-            tfbuf = tf.read()
+            path = Path("testfiles/output/file")
+            path.index = ("diff", "..", "warning-security-error")
+            ti = path.get_tarinfo()
+            fp = cStringIO.StringIO("")
+            tf.addfile(ti, fp)
 
-            fout = open(filename, "wb")
-            fout.write(tfbuf)
-            assert not fout.close()
+            tf.close()
 
         self.deltmp()
         make_bad_tar("testfiles/output/bad.tar")

=== modified file 'testing/test_tarfile.py'
--- testing/test_tarfile.py	2010-11-20 15:32:59 +0000
+++ testing/test_tarfile.py	2011-08-23 18:28:26 +0000
@@ -25,7 +25,7 @@
 # $Id: test_tarfile.py,v 1.11 2009/04/02 14:47:12 loafman Exp $
 
 import config
-import sys, os, shutil, StringIO, tempfile, unittest, stat, pwd, grp
+import sys, os, shutil, StringIO, tempfile, unittest, stat
 
 from duplicity import tarfile
 
@@ -187,52 +187,11 @@
             tf.add(filename, filename, 0)
         tf.close()
 
-    def make_temptar_iterator(self):
-        """Tar up tempdir using an iterator"""
-        try:
-            os.lstat("temp2.tar")
-        except OSError:
-            pass
-        else:
-            assert not os.system("rm temp2.tar")
-
-        self.make_tempdir()
-        def generate_pairs(tfi_list):
-            for filename in self.files_in_tempdir:
-                ti = tarfile.TarInfo()
-                ti.set_arcname(filename)
-                ti.init_from_stat(os.lstat(filename))
-                if filename == "tempdir/hardlinked2":
-                    ti.type = tarfile.LNKTYPE
-                    ti.linkname = "tempdir/hardlinked1"
-                    yield (ti, None)
-                elif filename == "tempdir" or filename == "tempdir/fifo":
-                    yield (ti, None)
-                elif filename == "tempdir/symlink":
-                    ti.linkname = os.readlink(filename)
-                    yield (ti, None)
-                else:
-                    yield (ti, open(filename, "rb"))
-        tfi_list = [None]
-        tfi = tarfile.TarFromIterator(generate_pairs(tfi_list))
-        tfi_list[0] = tfi # now generate_pairs can find tfi
-
-        buf = tfi.read()
-        tfi.close()
-        fout = open("temp2.tar", "wb")
-        fout.write(buf)
-        fout.close()
-
     def test_tarfile_creation(self):
         """Create directory, make tarfile, extract using gnutar, compare"""
         self.make_temptar()
         self.extract_and_compare_tarfile()
 
-    def test_tarfile_creation_from_iterator(self):
-        """Same as test_tarfile_creation, but use iterator interface"""
-        self.make_temptar_iterator()
-        self.extract_and_compare_tarfile()
-
     def extract_and_compare_tarfile(self):
         os.system("rm -r tempdir")
         assert not os.system("tar -xf temp2.tar")
@@ -354,51 +313,13 @@
     def seek(self, position):
         #print "Seeking to ", position
         return self.infp.seek(position)
+    def tell(self):
+        #print "Telling"
+        return self.infp.tell()
     def close(self):
         #print "Closing"
         return self.infp.close()
 
 
-class PasswordTest(unittest.TestCase):
-    """Test retrieving, storing password information"""
-    def compare(self, thunk1, thunk2):
-        """Make sure thunk1 and thunk2 return the same"""
-        try: result1 = thunk1()
-        except KeyError, exc1: keyerror = 1 #@UnusedVariable
-        else: keyerror = 0
-
-        try: result2 = thunk2()
-        except KeyError, exc2: #@UnusedVariable
-            assert keyerror, "Got KeyError vs " + str(result2)
-            return
-        else: assert not keyerror, "Got %s vs KeyError" % (str(result1),)
-
-        assert result1 == result2, (result1, result2)
-
-    def test_uid2uname(self):
-        """Test getting unames by uid"""
-        for uid in (0, 500, 789, 0, 0, 500):
-            self.compare(lambda: tarfile.uid2uname(uid),
-                         lambda: pwd.getpwuid(uid)[0])
-
-    def test_gid2gname(self):
-        """Test getting group names by gid"""
-        for gid in (0, 500, 789, 0, 0, 500):
-            self.compare(lambda: tarfile.gid2gname(gid),
-                         lambda: grp.getgrgid(gid)[0])
-
-    def test_gname2gid(self):
-        """Test getting gids from gnames"""
-        for gname in ('root', 'ben', 'bin', 'sanothua', 'root', 'root'):
-            self.compare(lambda: tarfile.gname2gid(gname),
-                         lambda: grp.getgrnam(gname)[2])
-
-    def test_uname2uid(self):
-        """Test getting uids from unames"""
-        for uname in ('root', 'ben', 'bin', 'sanothua', 'root', 'root'):
-            self.compare(lambda: tarfile.uname2uid(uname),
-                         lambda: pwd.getpwnam(uname)[2])
-
-
 if __name__ == "__main__":
     unittest.main()

Follow ups

[Merge] lp:~mterry/duplicity/tarfile into lp:duplicity
From: noreply, 2011-10-05
Re: [Merge] lp:~mterry/duplicity/tarfile into lp:duplicity
From: Michael Terry, 2011-08-23
Re: [Merge] lp:~mterry/duplicity/tarfile into lp:duplicity
From: Michael Terry, 2011-08-23