SciTools · ukmo-ccbunney · May 15, 2026 · May 14, 2026 · May 14, 2026
diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst
@@ -48,7 +48,9 @@ This document explains the changes made to Iris for this release
 🚀 Performance Enhancements
 ===========================
 
-#. N/A
+#. `@trexfeathers`_ improved the speed of field iteration when reading PP files.
+   Up to 3x speed up has been seen, dependending on the circumstances.
+   (:pull:`7089`)
 
 
 🔥 Deprecations

diff --git a/lib/iris/fileformats/pp.py b/lib/iris/fileformats/pp.py
@@ -1870,19 +1870,32 @@ def _field_gen(filename, read_data_bytes, little_ended=False):
         pp_file_read = pp_file.read
 
         field_count = 0
+        # Total bytes for the full header record:
+        #   leading length word + long headers + float headers + trailing length word
+        _HEADER_BYTES = PP_WORD_DEPTH * (1 + NUM_LONG_HEADERS + NUM_FLOAT_HEADERS + 1)
+        _LONGS_OFFSET = PP_WORD_DEPTH  # bytes: skip leading length word
+        _FLOATS_OFFSET = _LONGS_OFFSET + NUM_LONG_HEADERS * PP_WORD_DEPTH
+        dtype_longs = np.dtype("%ci%d" % (dtype_endian_char, PP_WORD_DEPTH))
+        dtype_floats = np.dtype("%cf%d" % (dtype_endian_char, PP_WORD_DEPTH))
         # Keep reading until we reach the end of file
         while True:
-            # Move past the leading header length word
-            pp_file_seek(PP_WORD_DEPTH, os.SEEK_CUR)
-            # Get the LONG header entries
-            dtype = "%ci%d" % (dtype_endian_char, PP_WORD_DEPTH)
-            header_longs = np.fromfile(pp_file, dtype=dtype, count=NUM_LONG_HEADERS)
+            # Read the entire header record in one go
+            header_buf = pp_file_read(_HEADER_BYTES)
             # Nothing returned => EOF
-            if len(header_longs) == 0:
+            if len(header_buf) == 0:
                 break
-            # Get the FLOAT header entries
-            dtype = "%cf%d" % (dtype_endian_char, PP_WORD_DEPTH)
-            header_floats = np.fromfile(pp_file, dtype=dtype, count=NUM_FLOAT_HEADERS)
+            header_longs = np.frombuffer(
+                header_buf,
+                dtype=dtype_longs,
+                count=NUM_LONG_HEADERS,
+                offset=_LONGS_OFFSET,
+            )
+            header_floats = np.frombuffer(
+                header_buf,
+                dtype=dtype_floats,
+                count=NUM_FLOAT_HEADERS,
+                offset=_FLOATS_OFFSET,
+            )
             header = tuple(header_longs) + tuple(header_floats)
 
             # Make a PPField of the appropriate sub-class (depends on header
@@ -1900,9 +1913,6 @@ def _field_gen(filename, read_data_bytes, little_ended=False):
                 )
                 break
 
-            # Skip the trailing 4-byte word containing the header length
-            pp_file_seek(PP_WORD_DEPTH, os.SEEK_CUR)
-
             # Read the word telling me how long the data + extra data is
             # This value is # of bytes
             len_of_data_plus_extra = struct.unpack_from(

diff --git a/lib/iris/tests/unit/fileformats/pp/test__field_gen.py b/lib/iris/tests/unit/fileformats/pp/test__field_gen.py
@@ -6,12 +6,20 @@
 
 import contextlib
 import io
+import struct
 
 import numpy as np
 import pytest
 
 import iris.fileformats.pp as pp
 
+# Byte size of the full header record that _field_gen reads in one go:
+#   leading length word + long headers + float headers + trailing length word
+_HEADER_BYTES = pp.PP_WORD_DEPTH * (1 + pp.NUM_LONG_HEADERS + pp.NUM_FLOAT_HEADERS + 1)
+# A valid data-length word: 4 bytes encoding the value 4 (big-endian uint32),
+# matching lblrec=1 * PP_WORD_DEPTH=4 so LBLREC validation passes.
+_DATA_LEN_WORD = struct.pack(">L", 4)
+
 
 class Test:
     @pytest.fixture
@@ -21,24 +29,31 @@ def _mock_for_field_gen(fields):
             side_effect_fields = list(fields)[:]
 
             def make_pp_field_override(*args):
-                # Iterates over the fields passed to this context manager,
-                # until there are no more, upon which the np.fromfile
-                # returns an empty list and the while loop in load() is
-                # broken.
-                result = side_effect_fields.pop(0)
-                if not side_effect_fields:
-                    np.fromfile.return_value = []
-                return result
-
-            open_func = "builtins.open"
-            mocker.patch("numpy.fromfile", return_value=[0])
-            mocker.patch(open_func)
+                return side_effect_fields.pop(0)
+
+            # Build the sequence of bytes that pp_file.read() will return:
+            #   For each field: a _HEADER_BYTES-sized buffer (all zeros is fine
+            #   for our purposes — make_pp_field is fully mocked), followed by
+            #   a 4-byte data-length word.
+            # After all fields: b"" to signal EOF on the next header read.
+            read_side_effects = []
+            for _ in fields:
+                read_side_effects.append(bytes(_HEADER_BYTES))  # header read
+                read_side_effects.append(_DATA_LEN_WORD)  # data-len word
+            read_side_effects.append(b"")  # EOF
+
+            mock_file = mocker.MagicMock(spec=io.RawIOBase)
+            mock_file.__enter__ = mocker.Mock(return_value=mock_file)
+            mock_file.__exit__ = mocker.Mock(return_value=False)
+            mock_file.read.side_effect = read_side_effects
+
+            mocker.patch("builtins.open", return_value=mock_file)
             mocker.patch("struct.unpack_from", return_value=[4])
             mocker.patch(
                 "iris.fileformats.pp.make_pp_field",
                 side_effect=make_pp_field_override,
             )
-            yield
+            yield mock_file
 
         return _mock_for_field_gen
 
@@ -57,40 +72,34 @@ def test_lblrec_invalid(self, mocker, mock_for_field_gen):
         assert len(warn) == 1
 
     def test_read_headers_call(self, mocker, mock_for_field_gen):
-        # Checks that the two calls to np.fromfile are called in the
-        # expected way.
+        # Checks that the file is read in a single call of _HEADER_BYTES and
+        # that np.frombuffer is used to parse longs and floats from that buffer.
         pp_field = mocker.Mock(lblrec=1, lbext=0, lbuser=[0])
-        with mock_for_field_gen([pp_field]):
-            open_fh = mocker.MagicMock(spec=io.RawIOBase)
-            open.return_value = open_fh
+        mock_frombuffer = mocker.patch("numpy.frombuffer", wraps=np.frombuffer)
+        with mock_for_field_gen([pp_field]) as mock_file:
             next(pp._field_gen("mocked", read_data_bytes=False))
-            with open_fh as open_fh_ctx:
-                calls = [
-                    mocker.call(open_fh_ctx, count=45, dtype=">i4"),
-                    mocker.call(open_fh_ctx, count=19, dtype=">f4"),
-                ]
-            np.fromfile.assert_has_calls(calls)
-        with open_fh as open_fh_ctx:
-            expected_deferred_bytes = (
-                "mocked",
-                open_fh_ctx.tell(),
-                4,
-                np.dtype(">f4"),
-            )
-        assert pp_field.data == expected_deferred_bytes
+        # The first read() call should request exactly _HEADER_BYTES bytes.
+        first_read_call = mock_file.read.call_args_list[0]
+        assert first_read_call == mocker.call(_HEADER_BYTES)
+
+        # frombuffer should have been called twice: once for longs, once for floats.
+        assert mock_frombuffer.call_count == 2
+        calls = mock_frombuffer.call_args_list
+        assert calls[0].kwargs["count"] == pp.NUM_LONG_HEADERS
+        assert calls[0].kwargs["dtype"] == np.dtype(">i4")
+        assert calls[1].kwargs["count"] == pp.NUM_FLOAT_HEADERS
+        assert calls[1].kwargs["dtype"] == np.dtype(">f4")
 
     def test_read_data_call(self, mocker, mock_for_field_gen):
         # Checks that data is read if read_data is True.
         pp_field = mocker.Mock(lblrec=1, lbext=0, lbuser=[0])
-        with mock_for_field_gen([pp_field]):
-            open_fh = mocker.MagicMock(spec=io.RawIOBase)
-            open.return_value = open_fh
+        with mock_for_field_gen([pp_field]) as mock_file:
             next(pp._field_gen("mocked", read_data_bytes=True))
-        with open_fh as open_fh_ctx:
-            expected_loaded_bytes = pp.LoadedArrayBytes(
-                open_fh_ctx.read(), np.dtype(">f4")
-            )
-        assert pp_field.data == expected_loaded_bytes
+        # The third read() call (index 2) should be the data payload read
+        # with data_len = lblrec*PP_WORD_DEPTH - lbext*PP_WORD_DEPTH = 4 bytes.
+        data_read_call = mock_file.read.call_args_list[2]
+        assert data_read_call == mocker.call(4)
+        assert isinstance(pp_field.data, pp.LoadedArrayBytes)
 
     def test_invalid_header_release(self, tmp_path):
         # Check that an unknown LBREL value just results in a warning