Skip to content

Commit 190b3ef

Browse files
fix(150075): adding tarinfo offset info before and after reading the file
Signed-off-by: Grant Herman <grantlouisherman041@gmail.com>
1 parent 1f3c267 commit 190b3ef

2 files changed

Lines changed: 27 additions & 1 deletion

File tree

Lib/tarfile.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2360,11 +2360,14 @@ def addfile(self, tarinfo, fileobj=None):
23602360
raise ValueError("fileobj not provided for non zero-size regular file")
23612361

23622362
tarinfo = copy.copy(tarinfo)
2363-
2363+
# get current offset
2364+
tarinfo.offset = self.offset
23642365
buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
23652366
self.fileobj.write(buf)
23662367
self.offset += len(buf)
2368+
# add original offset to block size
23672369
bufsize=self.copybufsize
2370+
tarinfo.offset_data = self.offset
23682371
# If there's data to follow, append it.
23692372
if fileobj is not None:
23702373
copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)

Lib/test/test_tarfile.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1456,6 +1456,29 @@ class WriteTest(WriteTestBase, unittest.TestCase):
14561456

14571457
prefix = "w:"
14581458

1459+
def test_addfile_sets_offsets(self):
1460+
# gh-150075: addfile() must set offset and offset_data on the
1461+
# TarInfo stored in the archive so they match a subsequent read.
1462+
data = b"data"
1463+
1464+
with tarfile.open(tmpname, self.mode) as tar:
1465+
t1 = tarfile.TarInfo("test1.txt")
1466+
t1.size = len(data)
1467+
tar.addfile(t1, io.BytesIO(data))
1468+
1469+
t2 = tarfile.TarInfo("test2.txt")
1470+
t2.size = len(data)
1471+
tar.addfile(t2, io.BytesIO(data))
1472+
1473+
write_members = tar.getmembers()
1474+
1475+
with tarfile.open(tmpname) as tar:
1476+
read_members = tar.getmembers()
1477+
1478+
for w, r in zip(write_members, read_members):
1479+
self.assertEqual(w.offset, r.offset)
1480+
self.assertEqual(w.offset_data, r.offset_data)
1481+
14591482
def test_100_char_name(self):
14601483
# The name field in a tar header stores strings of at most 100 chars.
14611484
# If a string is shorter than 100 chars it has to be padded with '\0',

0 commit comments

Comments
 (0)