Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@

The geopins package provides geospatial support for the [Python pins package](https://github.com/rstudio/pins-python). The package publishes data, models, and other Python objects, making it easy to share them across projects and with your colleagues. With `geopins`, there is support for geospatial datatypes (e.g. `geopandas.GeoDatFrame`, and `rastr.Raster`) and filetypes (e.g. `GeoPackage`, and `GeoTIFF`), fully compatible with your existing pins boards.

The pins package provides drivers for standard Python tabular datatypes (e.g. `pandas.DataFrame`) and filetypes (e.g. CSV, Parquet, etc.). The geopins package brings the same experience to geospatial datatypes (e.g. `geopandas.GeoDatFrame`, and `rastr.Raster` and filetypes, fully compatible with your existing pins boards.

## Installation

```bash
Expand Down
22 changes: 22 additions & 0 deletions src/geopins/drivers/gdf/filetypes/gpkg.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import tempfile
import warnings
from pathlib import Path
from sqlite3 import connect
from typing import TYPE_CHECKING

import geopandas as gpd
Expand Down Expand Up @@ -112,6 +113,9 @@ def pin_write_gdf_gpkg( # noqa: PLR0913
path = Path(tmpdir_path) / f"{name}.gpkg"
x.to_file(path, driver="GPKG")

# Overwrite the modification time to keep hashing stable and release locks.
_snapshot_last_change(path=path)

with warnings.catch_warnings():
# Upstream issue relating to opening files without context managers
warnings.simplefilter("ignore", category=ResourceWarning)
Expand All @@ -123,3 +127,21 @@ def pin_write_gdf_gpkg( # noqa: PLR0913
description=description,
metadata=metadata,
)


def _snapshot_last_change(path: Path) -> None:
"""Set the last_change timestamp to Unix epoch to keep GeoPackage hashing stable."""

# Avoid `with connect(...)` because the context manager delays handle release on
# Windows, which keeps the temporary GeoPackage locked during cleanup.
conn = connect(path.as_posix())
try:
conn.execute(
"""
UPDATE gpkg_contents
SET last_change = '1970-01-01T00:00:00Z';
"""
)
conn.commit()
finally:
conn.close()
18 changes: 18 additions & 0 deletions tests/geopins/drivers/gdf/filetypes/test_gpkg.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

from time import sleep
from typing import TYPE_CHECKING

import geopandas as gpd
Expand Down Expand Up @@ -27,3 +28,20 @@ def test_round_trip(tmp_geoboard: GeoBaseBoard):
# Assert
assert gdf.equals(retrieved)
assert gdf.crs == retrieved.crs


def test_hash_is_not_dependent_on_file_write_time(tmp_geoboard: GeoBaseBoard):
# Arrange
gdf = gpd.GeoDataFrame(
{"id": [1, 2, 3]},
geometry=gpd.points_from_xy([0, 1, 2], [0, 1, 2]),
crs="EPSG:2193", # NZGD2000 / New Zealand Transverse Mercator 2000
)

# Act
meta1 = tmp_geoboard.pin_write(gdf, name="test-gdf-hash", type="gpkg")
sleep(1)
meta2 = tmp_geoboard.pin_write(gdf, name="test-gdf-hash", type="gpkg")

# Assert
assert meta1.pin_hash == meta2.pin_hash