Skip to content

Commit ffdda73

Browse files
Merge pull request SeequentEvo#136 from BenLewis-Seequent/download-attribute
Support downloading category data and attributes with DownloadedObject
2 parents 3a2e130 + ab2f58b commit ffdda73

11 files changed

Lines changed: 679 additions & 40 deletions

File tree

CHANGELOG.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,26 @@
44
### What's changed
55
#### evo-sdk
66
* Add DownloadedObject.update method to conveniently update an existing geoscience object by @BenLewis-Seequent in https://github.com/SeequentEvo/evo-python-sdk/pull/134
7+
* Add methods on DownloadedObject to download category data and attributes by @BenLewis-Seequent in https://github.com/SeequentEvo/evo-python-sdk/pull/124
8+
* Add MultiDownloadRequestHandler to test_tools, to be able to mock multiple downloads at the same time by @BenLewis-Seequent in https://github.com/SeequentEvo/evo-python-sdk/pull/124
79

810
**Full changelog**: https://github.com/SeequentEvo/evo-python-sdk/compare/evo-sdk@v0.1.12...evo-sdk@v0.1.13
911

1012
## evo-objects@v0.3.2
1113
### What's changed
1214
#### evo-objects
1315
* Add DownloadedObject.update method to conveniently update an existing geoscience object by @BenLewis-Seequent in https://github.com/SeequentEvo/evo-python-sdk/pull/134
16+
* Add methods on DownloadedObject to download category data and attributes by @BenLewis-Seequent in https://github.com/SeequentEvo/evo-python-sdk/pull/124
1417

1518
**Full changelog**: https://github.com/SeequentEvo/evo-python-sdk/compare/evo-objects@v0.3.1...evo-objects@v0.3.2
1619

20+
## evo-sdk-common@v0.5.8
21+
### What's changed
22+
#### evo-sdk-common
23+
* Add MultiDownloadRequestHandler to test_tools, to be able to mock multiple downloads at the same time by @BenLewis-Seequent in https://github.com/SeequentEvo/evo-python-sdk/pull/124
24+
25+
**Full changelog**: https://github.com/SeequentEvo/evo-python-sdk/compare/evo-sdk-common@v0.5.7...evo-sdk-common@v0.5.8
26+
1727
## evo-sdk@v0.1.12
1828
### What's changed
1929
#### evo-sdk

packages/evo-objects/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ authors = [
1010
]
1111

1212
dependencies = [
13-
"evo-sdk-common[jmespath]>=0.5.4",
13+
"evo-sdk-common[jmespath]>=0.5.8",
1414
"pydantic>=2,<3",
1515
]
1616

packages/evo-objects/src/evo/objects/client/object_client.py

Lines changed: 254 additions & 26 deletions
Large diffs are not rendered by default.

packages/evo-objects/src/evo/objects/parquet/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,12 @@
1515
raise ImportError("The 'pyarrow' package is required to use ParquetLoader") from None
1616

1717
from .loader import ParquetDownloader, ParquetLoader
18-
from .types import ArrayTableInfo, LookupTableInfo, TableInfo
18+
from .types import ArrayTableInfo, AttributeInfo, CategoryInfo, LookupTableInfo, TableInfo
1919

2020
__all__ = [
2121
"ArrayTableInfo",
22+
"AttributeInfo",
23+
"CategoryInfo",
2224
"LookupTableInfo",
2325
"ParquetDownloader",
2426
"ParquetLoader",

packages/evo-objects/src/evo/objects/parquet/types.py

Lines changed: 59 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,14 @@
1010
# limitations under the License.
1111

1212
import sys
13-
from typing import TypeAlias
13+
from typing import Generic, TypeAlias, TypeVar
1414

1515
if sys.version_info >= (3, 12):
1616
from typing import NotRequired, TypedDict
1717
else:
1818
from typing_extensions import NotRequired, TypedDict
1919

20-
__all__ = [
21-
"ArrayTableInfo",
22-
"LookupTableInfo",
23-
"TableInfo",
24-
]
20+
__all__ = ["ArrayTableInfo", "AttributeInfo", "CategoryInfo", "LookupTableInfo", "TableInfo"]
2521

2622

2723
class _BaseTableInfo(TypedDict):
@@ -30,13 +26,70 @@ class _BaseTableInfo(TypedDict):
3026

3127

3228
class ArrayTableInfo(_BaseTableInfo):
29+
"""Metadata for a non-lookup table.
30+
31+
The 'data' field contains the reference to the blob where the table data is stored.
32+
33+
The 'length', 'width', and 'data_type' fields describe the structure of the table.
34+
"""
35+
3336
data_type: str
3437
width: NotRequired[int]
3538

3639

3740
class LookupTableInfo(_BaseTableInfo):
41+
"""Metadata for lookup table, which is used to define categories.
42+
43+
The 'data' field contains the reference to the blob where the table data is stored.
44+
45+
The 'length', 'width', and 'data_type' fields describe the structure of the table.
46+
"""
47+
3848
keys_data_type: str
3949
values_data_type: str
4050

4151

4252
TableInfo: TypeAlias = ArrayTableInfo | LookupTableInfo
53+
54+
55+
class CategoryInfo(TypedDict):
56+
"""Metadata for category tables.
57+
58+
In Geoscience Object Schemas, categories are defined by an indices array(values) and a lookup table (table).
59+
"""
60+
61+
table: LookupTableInfo
62+
values: TableInfo
63+
64+
65+
T = TypeVar("T")
66+
67+
68+
class _Nan(TypedDict, Generic[T]):
69+
values: list[T]
70+
71+
72+
class NanCategorical(_Nan[int]):
73+
"""Metadata for representing 'not a number' (NaN) values in categorical/integer attributes.
74+
75+
In addition to supporting null values within certain tables, additional 'not a number' (NaN) values can be defined,
76+
which should be interpreted as 'not a number' (NaN).
77+
"""
78+
79+
80+
class NanContinuous(_Nan[float]):
81+
"""Metadata for representing 'not a number' (NaN) values in continuous attributes.
82+
83+
In addition to supporting null values within certain tables, additional 'not a number' (NaN) values can be defined,
84+
which should be interpreted as 'not a number' (NaN).
85+
"""
86+
87+
88+
class AttributeInfo(TypedDict):
89+
"""Metadata for attributes."""
90+
91+
name: str
92+
key: NotRequired[str]
93+
nan_description: NotRequired[NanCategorical | NanContinuous]
94+
values: ArrayTableInfo
95+
table: NotRequired[LookupTableInfo]

packages/evo-objects/tests/helpers.py

Lines changed: 119 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
from collections.abc import Iterator
1515
from datetime import datetime, timezone
1616
from io import BytesIO
17+
from typing import Any
1718

19+
import jmespath
1820
import numpy
1921
import pyarrow as pa
2022
import pyarrow.parquet as pq
@@ -162,7 +164,122 @@ def get_sample_table(
162164

163165

164166
def get_sample_table_and_bytes(table_format: BaseTableFormat, n_rows: int) -> tuple[pa.Table, bytes]:
165-
memory = BytesIO()
166167
table = get_sample_table(table_format, n_rows)
168+
return table, write_table_to_bytes(table)
169+
170+
171+
def write_table_to_bytes(table: pa.Table) -> bytes:
172+
memory = BytesIO()
167173
pq.write_table(table, where=memory, version="2.4", compression="gzip")
168-
return table, memory.getvalue()
174+
return memory.getvalue()
175+
176+
177+
# Support for assignment operations using JMESPath expressions.
178+
# Could be moved to evo.jmespath in the future, if we want to expose this functionality outside of tests.
179+
class _AssignmentTargetDictEntry:
180+
"""Represents a dictionary entry that potentially can be assigned to."""
181+
182+
def __init__(self, key: str, obj: dict):
183+
self.key = key
184+
self.obj = obj
185+
186+
@property
187+
def value(self) -> Any:
188+
"""Get the value at this dictionary entry, creating an empty dict if it doesn't exist."""
189+
return self.obj.setdefault(self.key, {})
190+
191+
192+
class _AssignmentTargetListEntry:
193+
"""Represents a list entry that potentially can be assigned to."""
194+
195+
def __init__(self, index: int, obj: list):
196+
self.index = index
197+
self.obj = obj
198+
199+
@property
200+
def value(self) -> Any:
201+
"""Get the value at this list entry, or None if the index is out of range."""
202+
try:
203+
return self.obj[self.index]
204+
except IndexError:
205+
return None
206+
207+
208+
class _AssignInterpreter(jmespath.visitor.Visitor):
209+
"""A JMESPath visitor used for processing assignment operations.
210+
211+
This only supports a subset of JMESPath expressions that can be used for assignment.
212+
213+
This works by lazily evaluating field and index accesses, so that the last operation can be turned into an
214+
assignment. If another operation is encountered after a field or index access, the value is evaluated at that
215+
point.
216+
"""
217+
218+
def default_visit(self, node, *args, **kwargs):
219+
raise NotImplementedError(node["type"])
220+
221+
@staticmethod
222+
def _evaluate_value(value):
223+
"""Lazily evaluate the value if it's an assignment target."""
224+
if isinstance(value, (_AssignmentTargetDictEntry, _AssignmentTargetListEntry)):
225+
return value.value
226+
else:
227+
return value
228+
229+
def visit_field(self, node, value):
230+
"""Visit a field access node, i.e. foo.bar."""
231+
evaluated_value = self._evaluate_value(value)
232+
if not isinstance(evaluated_value, dict):
233+
return None
234+
return _AssignmentTargetDictEntry(node["value"], evaluated_value)
235+
236+
def visit_index(self, node, value):
237+
"""Visit an index access node, i.e. foo[0]."""
238+
evaluated_value = self._evaluate_value(value)
239+
if not isinstance(evaluated_value, list):
240+
return None
241+
return _AssignmentTargetListEntry(node["value"], evaluated_value)
242+
243+
def _visit_sub_or_index_expression(self, node, value):
244+
"""Visit a subexpression or index expression node, i.e. foo.bar.baz or a[0][1]."""
245+
result = value
246+
for node in node["children"]:
247+
result = self.visit(node, result)
248+
return result
249+
250+
visit_subexpression = _visit_sub_or_index_expression
251+
visit_index_expression = _visit_sub_or_index_expression
252+
253+
254+
def assign_property(obj: dict, expression: str, value: Any) -> None:
255+
"""Assign a value to a property in a dictionary using a JMESPath expression.
256+
257+
This only supports a subset of JMESPath expressions that can be used for assignment. In particular, only the following
258+
expression types are supported:
259+
- Field accesses (e.g. foo.bar)
260+
- Index accesses (e.g. foo[0])
261+
- Subexpressions combining the above (e.g. foo.bar[0].baz)
262+
If the expression is not in that form, a JMESPathError will be raised.
263+
264+
Also, if the expression attempts to perform an invalid operation like:
265+
- Accessing a field on a non-object
266+
- Accessing an index on a non-array
267+
- Accessing an out-of-bounds index on an array
268+
then a JMESPathError will be raised.
269+
270+
Accessing a non-existent field on an object will create an empty object at that field to allow for nested assignments.
271+
272+
:param obj: The dictionary to assign the property to.
273+
:param expression: The JMESPath expression representing the property to assign to.
274+
:param value: The value to assign to the property.
275+
"""
276+
parsed_expression = jmespath.compile(expression)
277+
interpreter = _AssignInterpreter()
278+
target = interpreter.visit(parsed_expression.parsed, obj)
279+
280+
if isinstance(target, _AssignmentTargetDictEntry):
281+
target.obj[target.key] = value
282+
elif isinstance(target, _AssignmentTargetListEntry):
283+
target.obj[target.index] = value
284+
else:
285+
raise TypeError(f"Cannot assign to expression '{expression}'")

0 commit comments

Comments
 (0)