Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions src/boundaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,18 @@

class BoundariesWriter(GeoParquetWriter):
COLUMNS = [
("boundary", pyarrow.string()),
("admin_level", pyarrow.string()),
("name", pyarrow.list_(pyarrow.string())),
("names", pyarrow.map_(pyarrow.string(), pyarrow.list_(pyarrow.string()))),
("official_name", pyarrow.list_(pyarrow.string())),
(
"official_names",
pyarrow.map_(pyarrow.string(), pyarrow.list_(pyarrow.string())),
),
("int_name", pyarrow.list_(pyarrow.string())),
("alt_name", pyarrow.list_(pyarrow.string())),
("alt_names", pyarrow.map_(pyarrow.string(), pyarrow.list_(pyarrow.string()))),
("type", pyarrow.string()),
("admin_level", pyarrow.string()),
("boundary", pyarrow.string()),
("int_name", pyarrow.list_(pyarrow.string())),
# Border type tagging is a fairly deep rabbit hole...
# We may consider harmonizing other tags like admin_type:XX
# in the future. See https://github.com/osmus/layercake/pull/18#discussion_r2347797702
Expand Down
13 changes: 8 additions & 5 deletions src/geoparquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ def __init__(
self.filename = filename
self.row_group_size = row_group_size

# Check for reserved column names
reserved_names = {"type", "id", "bbox", "geometry"}
column_names = {name for name, _ in self.COLUMNS}
conflicts = column_names & reserved_names
assert not conflicts, f"Column names {conflicts} in schema {self.__class__.__name__} conflict with reserved names"

# Create the schema
bbox_schema = pyarrow.struct(
[
Expand Down Expand Up @@ -88,10 +94,7 @@ def __init__(
[
("type", pyarrow.string()),
("id", pyarrow.int64()),
(
"tags",
pyarrow.struct(self.COLUMNS),
),
*self.COLUMNS,
("bbox", bbox_schema),
("geometry", pyarrow.binary()),
],
Expand Down Expand Up @@ -125,7 +128,7 @@ def append(self, type, id, attrs, wkb_hex):
bbox = dict(zip(["xmin", "ymin", "xmax", "ymax"], shapely.bounds(geom)))

self.chunk.append(
{"type": type, "id": id, "tags": attrs, "bbox": bbox, "geometry": wkb}
{"type": type, "id": id, **attrs, "bbox": bbox, "geometry": wkb}
)

if len(self.chunk) >= self.row_group_size:
Expand Down
10 changes: 6 additions & 4 deletions src/settlements.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from osmium.osm import TagList

from .geoparquet import GeoParquetWriter
from .helpers import tags_with_prefix
from .helpers import tags_with_prefix, split_multi_value_field


class SettlementsWriter(GeoParquetWriter):
Expand All @@ -15,11 +15,11 @@ class SettlementsWriter(GeoParquetWriter):

COLUMNS = [
("place", pyarrow.string()),
("name", pyarrow.string()),
("name", pyarrow.list_(pyarrow.string())),
("names", pyarrow.map_(pyarrow.string(), pyarrow.string())),
("alt_name", pyarrow.string()),
("alt_name", pyarrow.list_(pyarrow.string())),
("alt_names", pyarrow.map_(pyarrow.string(), pyarrow.string())),
("official_name", pyarrow.string()),
("official_name", pyarrow.list_(pyarrow.string())),
("official_names", pyarrow.map_(pyarrow.string(), pyarrow.string())),
("wikidata", pyarrow.string()),
("wikipedia", pyarrow.string()),
Expand Down Expand Up @@ -71,6 +71,8 @@ def column(column_name: str, tags: TagList):
return int(tags.get("population"))
except (TypeError, ValueError):
return None
case "name" | "alt_name" | "official_name":
return split_multi_value_field(tags.get(column_name))
case "names" | "alt_names" | "official_names":
return tags_with_prefix(f"{column_name[:-1]}:", tags)
case _:
Expand Down