Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
58 commits
Select commit Hold shift + click to select a range
0f0a2df
Limited write support: initial support
sjperkins May 21, 2025
d582291
Remove debugging assign
sjperkins May 21, 2025
99d19bf
Remove unused method
sjperkins May 21, 2025
7253be7
Remove unused assign of common_store_args on the DataTree root
sjperkins May 21, 2025
cdd9573
Remove unused tmp_path fixture
sjperkins May 21, 2025
7d3e4f9
Use ternary operator
sjperkins May 21, 2025
389c451
Merge branch 'main' into write-support
sjperkins Jun 30, 2025
4cffec8
Merge branch 'main' into write-support
sjperkins Jul 16, 2025
658dfb6
Prototype support for adding new columns
sjperkins Jul 16, 2025
ce3716a
Use existing table descriptor during synthesis of column descriptors
sjperkins Jul 16, 2025
ffdc94d
Preliminary support for writing with regions
sjperkins Jul 17, 2025
d68ece2
Comments and typing
sjperkins Jul 18, 2025
30c54fa
Merge branch 'main' into write-support
sjperkins Jul 18, 2025
6fa1347
Merge branch 'main' into write-support
sjperkins Jul 18, 2025
85623e9
Merge branch 'main' into write-support
sjperkins Jul 18, 2025
4d1e0bb
Depend on arcae 0.4.0-alpha.1
sjperkins Aug 12, 2025
7667206
Distinguish data written by two test cases
sjperkins Aug 13, 2025
ce26c37
Temporarily get test cases working by using a function session scope …
sjperkins Aug 13, 2025
c567a5c
Upgrade to arcae 0.4.0.alpha.2
sjperkins Aug 25, 2025
f5f9530
Change simmed_ms test fixture back to scope
sjperkins Aug 25, 2025
df6db52
Create specific test data for individual write tests
sjperkins Aug 25, 2025
82b9402
Add a dask process/threads test case
sjperkins Aug 25, 2025
919946a
Merge branch 'main' into write-support
sjperkins Aug 26, 2025
9a4e4fa
Use max(shape[d], s.stop) to clamp expected_shape during writes
sjperkins Aug 26, 2025
84b1afe
Merge branch 'main' into write-support
sjperkins Aug 26, 2025
a92b605
Merge branch 'main' into write-support
sjperkins Aug 27, 2025
0af4a0f
Remove deprecated lockoptions kwarg
sjperkins Aug 27, 2025
451f830
Merge branch 'main' into write-support
sjperkins Sep 2, 2025
0a8354d
Disable threads case
sjperkins Sep 2, 2025
e10f693
Create a new MS for test_indexed_write
sjperkins Sep 2, 2025
3cce2d2
Introduce a gc.collect into clear_caches
sjperkins Sep 2, 2025
d3ab196
Merge branch 'main' into write-support
sjperkins Sep 3, 2025
91cb53b
Merge branch 'main' into write-support
sjperkins Sep 18, 2025
71836e5
Depend on arcae 0.4.0a3
sjperkins Sep 19, 2025
246ab67
Re-introduce comparison
sjperkins Sep 19, 2025
57b138b
WIP - introducing sync, maybe soft revert this commit
sjperkins Sep 22, 2025
2a26baa
Merge branch 'main' into write-support
sjperkins Feb 19, 2026
74251f2
Fix use of check_var and comment out code to get test csases passing
sjperkins Feb 19, 2026
0eb4edd
Use DataTree.sync_msv2() to add new columns
sjperkins Feb 19, 2026
97740a6
Guard xarray imports on version
sjperkins Feb 19, 2026
73eba5f
Merge branch 'main' into write-support
sjperkins Feb 27, 2026
bb48a91
Add MSv2Store.table_factory method
sjperkins Feb 27, 2026
432628f
Avoid delegating to a dask delayed object in dataset_to_msv2
sjperkins Feb 27, 2026
7eb1bdd
Move MAIN_PREFIX_DIMS into msv4_types.py
sjperkins Feb 27, 2026
4fb6b65
Patch write support into xarray Dataset and DataTrees on package import
sjperkins Feb 27, 2026
a16b00d
Merge branch 'main' into write-support
sjperkins Mar 2, 2026
60f0753
Upgrade to python 3.11
sjperkins Mar 3, 2026
e6b874f
Pin arcae to a 0.4.0a4 write alpha
sjperkins Mar 3, 2026
30c7c8c
Update tbump version regex to support alpha, betas and release candid…
sjperkins Mar 3, 2026
7b311e3
Increase precision of tbump version search regexes
sjperkins Mar 3, 2026
0c4bd58
Remove python 3.10 from the CI testing matrix
sjperkins Mar 3, 2026
58b525d
Bump to 0.4.0-alpha.1
sjperkins Mar 3, 2026
826e64c
Merge branch 'main' into write-support
sjperkins Mar 6, 2026
e2f6a07
Bump to 0.4.0-alpha.2
sjperkins Mar 6, 2026
886f8af
Align {sync,to}_msv2 with to_zarr. i.e. the user should drop variable…
sjperkins Mar 9, 2026
9575b0d
Bump to 0.4.0-alpha.3
sjperkins Mar 9, 2026
2038995
Merge branch 'main' into write-support
sjperkins Mar 23, 2026
3718b4c
Bump to 0.4.0-alpha.4
sjperkins Mar 23, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
project = "xarray-ms"
copyright = "2024 - 2025 NRF (SARAO) and Rhodes University (RATT) Centre"
author = "Simon Perkins"
release = "0.5.1"
release = "0.4.0-alpha.4"

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
Expand Down
16 changes: 9 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
[project]
name = "xarray-ms"
version = "0.5.1"
version = "0.4.0-alpha.4"
description = "xarray MSv4 views over MSv2 Measurement Sets"
authors = [{name = "Simon Perkins", email = "simon.perkins@gmail.com"}]
readme = "README.rst"
requires-python = ">=3.11"
dependencies = [
"xarray>=2025.0",
"cacheout>=0.16.0",
"arcae>=0.5.1, < 0.6.0",
"arcae>=0.4.0a4, < 0.5.0",
"typing-extensions>=4.12.2",
]

Expand Down Expand Up @@ -55,18 +55,18 @@ extend-select = ["I"]
# github_url = "https://github.com/<user or organization>/<project>/"

[tool.tbump.version]
current = "0.5.1"
current = "0.4.0-alpha.4"

# Example of a semver regexp.
# Make sure this matches current_version before
# using tbump
# https://semver.org/#spec-item-9
regex = '''
(?P<major>\d+)
\.
(?P<minor>\d+)
\.
(?P<patch>\d+)
'''
-?
(?P<ptype>(alpha|beta|rc)\.(?P<prerelease>\d+))?
'''

[tool.tbump.git]
message_template = "Bump to {new_version}"
Expand All @@ -77,9 +77,11 @@ tag_template = "{new_version}"
# tbump.toml location.
[[tool.tbump.file]]
src = "pyproject.toml"
search = "^(current|version)\\s*=\\s*\"{current_version}\""

[[tool.tbump.file]]
src = "doc/source/conf.py"
search = "^release\\s*=\\s*\"{current_version}\""

# You can specify a list of commands to
# run after the files have been patched
Expand Down
12 changes: 9 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import gc

import numpy as np
import pytest
from arcae.lib.arrow_tables import Table, ms_descriptor
Expand Down Expand Up @@ -38,14 +40,18 @@ def pytest_collection_modifyitems(config, items):
@pytest.fixture(autouse=True)
def clear_caches():
yield
Multiton._INSTANCE_CACHE.clear()

# Structure Factories have references to Multitons
MSv2StructureFactory._STRUCTURE_CACHE.clear()
Multiton._INSTANCE_CACHE.clear()
gc.collect()


@pytest.fixture(scope="session", params=[DEFAULT_SIM_PARAMS])
def simmed_ms(request, tmp_path_factory):
ms = tmp_path_factory.mktemp("simulated") / request.param.pop("name", "test.ms")
simulator = MSStructureSimulator(**{**DEFAULT_SIM_PARAMS, **request.param})
params = request.param.copy()
ms = tmp_path_factory.mktemp("simulated") / params.pop("name", "test.ms")
simulator = MSStructureSimulator(**{**DEFAULT_SIM_PARAMS, **params})
simulator.simulate_ms(str(ms))
return str(ms)

Expand Down
154 changes: 154 additions & 0 deletions tests/test_write.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
from contextlib import ExitStack

import arcae
import numpy as np
import pytest
import xarray
from xarray import DataTree

import xarray_ms
from xarray_ms.errors import MismatchedWriteRegion
from xarray_ms.msv4_types import CORRELATED_DATASET_TYPES


def test_write_support():
assert xarray_ms.multithreaded_writes()


@pytest.mark.parametrize("simmed_ms", [{"name": "test_store.ms"}], indirect=True)
def test_store(simmed_ms):
read = written = False

with xarray.open_datatree(simmed_ms, auto_corrs=True) as xdt:
# Overwrite UVW coordinates with zeroes
# Add a CORRECTED column
for node in xdt.subtree:
if node.attrs.get("type") in CORRELATED_DATASET_TYPES:
assert not np.all(node.UVW == 0)
node.UVW[:] = 0
assert len(node.encoding) > 0
ds = node.ds.assign(CORRECTED=xarray.full_like(node.VISIBILITY, 2 + 3j))
xdt[node.path] = DataTree(ds)
assert len(node.encoding) > 0

xdt.sync_msv2()
xdt.to_msv2()
written = written or True

with xarray.open_datatree(simmed_ms, auto_corrs=True) as xdt:
for node in xdt.subtree:
if node.attrs.get("type") in CORRELATED_DATASET_TYPES:
np.testing.assert_array_equal(node.UVW, 0)
np.testing.assert_array_equal(node.CORRECTED, 2 + 3j)
read = read or True

assert read
assert written

# We can check that CORRECTED has been written correctly
with arcae.table(simmed_ms) as T:
np.testing.assert_array_equal(T.getcol("UVW"), 0)
np.testing.assert_array_equal(T.getcol("CORRECTED"), 2 + 3j)


@pytest.mark.parametrize("simmed_ms", [{"name": "test_store_region.ms"}], indirect=True)
def test_store_region(simmed_ms):
region = {"time": slice(0, 2), "frequency": slice(2, 4)}

with xarray.open_datatree(simmed_ms, auto_corrs=True) as xdt:
# Add a CORRECTED column
for node in xdt.subtree:
if node.attrs.get("type") in CORRELATED_DATASET_TYPES:
ds = node.ds.assign(CORRECTED=xarray.zeros_like(node.VISIBILITY))
xdt[node.path] = DataTree(ds)
assert len(node.encoding) > 0

# Create the new MS columns
xdt.sync_msv2()

for node in xdt.subtree:
if node.attrs.get("type") in CORRELATED_DATASET_TYPES:
sizes = node.sizes
ds = ds.isel(**region)
ds = ds.assign(CORRECTED=xarray.full_like(ds.CORRECTED, 1 + 2j))
# Now write it out
ds.to_msv2(compute=False, region=region)

# We can check that CORRECTED has been written correctly
with arcae.table(simmed_ms) as T:
corrected = T.getcol("CORRECTED")
nt, nbl, nf, npol = (
sizes[d] for d in ("time", "baseline_id", "frequency", "polarization")
)
corrected = corrected.reshape((nt, nbl, nf, npol))
ts, fs = (region[d] for d in ("time", "frequency"))
mask = np.full(corrected.shape, False, np.bool_)
mask[ts, :, fs, :] = True
np.testing.assert_array_equal(corrected[mask], 1 + 2j)
np.testing.assert_array_equal(corrected[~mask], 0 + 0j)


@pytest.mark.parametrize("chunks", [{"time": 2, "frequency": 2}])
@pytest.mark.parametrize("simmed_ms", [{"name": "distributed-write.ms"}], indirect=True)
@pytest.mark.parametrize("nworkers", [4])
@pytest.mark.parametrize("processes", [True, False])
def test_distributed_write(simmed_ms, processes, nworkers, chunks):
da = pytest.importorskip("dask.array")
distributed = pytest.importorskip("dask.distributed")

with ExitStack() as stack:
cluster = stack.enter_context(
distributed.LocalCluster(processes=processes, n_workers=nworkers)
)
stack.enter_context(distributed.Client(cluster))
dt = stack.enter_context(
xarray.open_datatree(simmed_ms, chunks=chunks, auto_corrs=True)
)
for node in dt.subtree:
if node.attrs.get("type") in CORRELATED_DATASET_TYPES:
vis = node.VISIBILITY
sizes = node.sizes
corrected = da.arange(np.prod(vis.shape), dtype=np.int32)
corrected = corrected.reshape(vis.shape).rechunk(vis.data.chunks)
ds = node.ds.assign(CORRECTED=(vis.dims, corrected))
dt[node.path] = DataTree(ds)
assert len(node.encoding) > 0

# Create the new MS columns
dt.sync_msv2()
dt.to_msv2(compute=False)

for node in dt.subtree:
if node.attrs.get("type") in CORRELATED_DATASET_TYPES:
node.ds.to_msv2(compute=True)

with arcae.table(simmed_ms) as T:
corrected = T.getcol("CORRECTED")
shape = tuple(
sizes[d] for d in ("time", "baseline_id", "frequency", "polarization")
)
expected = np.arange(np.prod(vis.shape), dtype=np.int32)
expected = expected.reshape((-1,) + shape[2:])
np.testing.assert_array_equal(corrected, expected)


@pytest.mark.parametrize("simmed_ms", [{"name": "indexed-write.ms"}], indirect=True)
def test_indexed_write(simmed_ms):
"""Check that we throw if we select a variable out with an integer index
and then try write that sub-selection out"""
dt = xarray.open_datatree(simmed_ms)
assert len(dt.children) == 1

for node in dt.subtree:
if node.attrs.get("type") in CORRELATED_DATASET_TYPES:
ds = node.ds.assign(CORRECTED=xarray.full_like(node.VISIBILITY, 1 + 2j))
dt[node.path] = DataTree(ds)

dt.sync_msv2()
dt.to_msv2(compute=False)

for node in dt.subtree:
if node.attrs.get("type") in CORRELATED_DATASET_TYPES:
ds = node.ds.isel(time=slice(0, 2), baseline_id=slice(0, 2), frequency=1)
with pytest.raises(MismatchedWriteRegion):
ds.to_msv2(compute=True)
36 changes: 36 additions & 0 deletions xarray_ms/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import warnings

HAS_WRITE_SUPPORT = False


def _install_write_support():
"""Patch write support methods onto xarray.Dataset and xarray.DataTree"""
global HAS_WRITE_SUPPORT

if HAS_WRITE_SUPPORT:
return

try:
from xarray import Dataset, DataTree

from xarray_ms.backend.msv2.writes import (
dataset_to_msv2,
datatree_to_msv2,
sync_msv2,
)
except ImportError as e:
warnings.warn(f"Engaging write support failed due to {e}", UserWarning)
HAS_WRITE_SUPPORT = False
else:
Dataset.to_msv2 = dataset_to_msv2
DataTree.to_msv2 = datatree_to_msv2
DataTree.sync_msv2 = sync_msv2
HAS_WRITE_SUPPORT = True


def multithreaded_writes() -> bool:
"""Return True if multithreaded write support is enabled"""
return HAS_WRITE_SUPPORT


_install_write_support()
Loading
Loading