gh-136170: Revert adding `ZipFile.data_offset` (GH-136950)
* Revert "gh-84481: Make ZipFile.data_offset more robust (#132178)"
This reverts commit 6cd1d6c6b142697fb72f422b7b448c27ebc30534.
* Revert "gh-84481: Add ZipFile.data_offset attribute (#132165)"
This reverts commit 0788948dcb980c7648b29ca363390b696d7f188f.
---------
Co-authored-by: Gregory P. Smith <greg@krypto.org>
diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst
index bf9136a..a1261ec 100644
--- a/Doc/library/zipfile.rst
+++ b/Doc/library/zipfile.rst
@@ -558,14 +558,6 @@
it should be no longer than 65535 bytes. Comments longer than this will be
truncated.
-.. attribute:: ZipFile.data_offset
-
- The offset to the start of ZIP data from the beginning of the file. When the
- :class:`ZipFile` is opened in either mode ``'w'`` or ``'x'`` and the
- underlying file does not support ``tell()``, the value will be ``None``
- instead.
-
- .. versionadded:: 3.14
.. _path-objects:
diff --git a/Lib/test/test_zipfile/test_core.py b/Lib/test/test_zipfile/test_core.py
index ada9681..c033059 100644
--- a/Lib/test/test_zipfile/test_core.py
+++ b/Lib/test/test_zipfile/test_core.py
@@ -3470,60 +3470,6 @@ def test_execute_zip64(self):
self.assertIn(b'number in executable: 5', output)
-class TestDataOffsetPrependedZip(unittest.TestCase):
- """Test .data_offset on reading zip files with an executable prepended."""
-
- def setUp(self):
- self.exe_zip = findfile('exe_with_zip', subdir='archivetestdata')
- self.exe_zip64 = findfile('exe_with_z64', subdir='archivetestdata')
-
- def _test_data_offset(self, name):
- with zipfile.ZipFile(name) as zipfp:
- self.assertEqual(zipfp.data_offset, 713)
-
- def test_data_offset_with_exe_prepended(self):
- self._test_data_offset(self.exe_zip)
-
- def test_data_offset_with_exe_prepended_zip64(self):
- self._test_data_offset(self.exe_zip64)
-
-class TestDataOffsetZipWrite(unittest.TestCase):
- """Test .data_offset for ZipFile opened in write mode."""
-
- def setUp(self):
- os.mkdir(TESTFNDIR)
- self.addCleanup(rmtree, TESTFNDIR)
- self.test_path = os.path.join(TESTFNDIR, 'testoffset.zip')
-
- def test_data_offset_write_no_prefix(self):
- with io.BytesIO() as fp:
- with zipfile.ZipFile(fp, "w") as zipfp:
- self.assertEqual(zipfp.data_offset, 0)
-
- def test_data_offset_write_with_prefix(self):
- with io.BytesIO() as fp:
- fp.write(b"this is a prefix")
- with zipfile.ZipFile(fp, "w") as zipfp:
- self.assertEqual(zipfp.data_offset, 16)
-
- def test_data_offset_append_with_bad_zip(self):
- with io.BytesIO() as fp:
- fp.write(b"this is a prefix")
- with zipfile.ZipFile(fp, "a") as zipfp:
- self.assertEqual(zipfp.data_offset, 16)
-
- def test_data_offset_write_no_tell(self):
- # The initializer in ZipFile checks if tell raises AttributeError or
- # OSError when creating a file in write mode when deducing the offset
- # of the beginning of zip data
- class NoTellBytesIO(io.BytesIO):
- def tell(self):
- raise OSError("Unimplemented!")
- with NoTellBytesIO() as fp:
- with zipfile.ZipFile(fp, "w") as zipfp:
- self.assertIsNone(zipfp.data_offset)
-
-
class EncodedMetadataTests(unittest.TestCase):
file_names = ['\u4e00', '\u4e8c', '\u4e09'] # Han 'one', 'two', 'three'
file_content = [
diff --git a/Lib/zipfile/__init__.py b/Lib/zipfile/__init__.py
index 18caeb3..2969f73 100644
--- a/Lib/zipfile/__init__.py
+++ b/Lib/zipfile/__init__.py
@@ -1452,7 +1452,6 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
self._lock = threading.RLock()
self._seekable = True
self._writing = False
- self._data_offset = None
try:
if mode == 'r':
@@ -1463,7 +1462,6 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
self._didModify = True
try:
self.start_dir = self.fp.tell()
- self._data_offset = self.start_dir
except (AttributeError, OSError):
self.fp = _Tellable(self.fp)
self.start_dir = 0
@@ -1488,7 +1486,6 @@ def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=True,
# even if no files are added to the archive
self._didModify = True
self.start_dir = self.fp.tell()
- self._data_offset = self.start_dir
else:
raise ValueError("Mode must be 'r', 'w', 'x', or 'a'")
except:
@@ -1535,10 +1532,6 @@ def _RealGetContents(self):
# self.start_dir: Position of start of central directory
self.start_dir = offset_cd + concat
- # store the offset to the beginning of data for the
- # .data_offset property
- self._data_offset = concat
-
if self.start_dir < 0:
raise BadZipFile("Bad offset for central directory")
fp.seek(self.start_dir, 0)
@@ -1599,12 +1592,6 @@ def _RealGetContents(self):
zinfo._end_offset = end_offset
end_offset = zinfo.header_offset
- @property
- def data_offset(self):
- """The offset to the start of zip data in the file or None if
- unavailable."""
- return self._data_offset
-
def namelist(self):
"""Return a list of file names in the archive."""
return [data.filename for data in self.filelist]
diff --git a/Misc/NEWS.d/next/Library/2025-07-21-22-35-50.gh-issue-136170.QUlc78.rst b/Misc/NEWS.d/next/Library/2025-07-21-22-35-50.gh-issue-136170.QUlc78.rst
new file mode 100644
index 0000000..fd30fe1
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2025-07-21-22-35-50.gh-issue-136170.QUlc78.rst
@@ -0,0 +1,3 @@
+Removed the unreleased ``zipfile.ZipFile.data_offset`` property added in 3.14.0a7
+as it wasn't fully clear which behavior it should have in some situations so
+the result was not always what a user might expect.