Skip to content

Commit 2afe75c

Browse files
Dev 12 25 (#18)
* read of wellboreFrameRepresentation * new storage interface * keep h5 open for efficiency * epc stream improvements.
1 parent ed61eaf commit 2afe75c

29 files changed

Lines changed: 5325 additions & 861 deletions

.github/workflows/ci_energyml_utils_pull_request.yml

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
## SPDX-License-Identifier: Apache-2.0
44
##
55
---
6-
name: Publish (pypiTest)
6+
name: Test/Build/Publish (pypiTest)
77

88
defaults:
99
run:
@@ -18,8 +18,31 @@ on:
1818
types: [published]
1919

2020
jobs:
21+
test:
22+
name: Run tests
23+
runs-on: ubuntu-latest
24+
steps:
25+
- name: Checkout code
26+
uses: actions/checkout@v4
27+
with:
28+
fetch-depth: 0
29+
30+
- name: Install poetry
31+
uses: ./.github/actions/prepare-poetry
32+
with:
33+
python-version: "3.10"
34+
35+
- name: Install dependencies
36+
run: |
37+
poetry install
38+
39+
- name: Run pytest
40+
run: |
41+
poetry run pytest -v --tb=short
42+
2143
build:
2244
name: Build distribution
45+
needs: [test]
2346
runs-on: ubuntu-latest
2447
steps:
2548
- name: Checkout code

energyml-utils/.flake8

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[flake8]
22
# Ignore specific error codes (comma-separated list)
3-
ignore = E501, E722, W503, F403, E203, E202
3+
ignore = E501, E722, W503, F403, E203, E202, E402
44

55
# Max line length (default is 79, can be changed)
66
max-line-length = 120

energyml-utils/.gitignore

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ sample/
4444
gen*/
4545
manip*
4646
*.epc
47+
*.h5
4748
*.off
4849
*.obj
4950
*.log
@@ -54,8 +55,16 @@ manip*
5455

5556
*.xml
5657
*.json
58+
docs/*.md
59+
60+
# DATA
61+
*.obj
62+
*.geojson
63+
*.vtk
64+
*.stl
5765

5866

5967
# WIP
6068
src/energyml/utils/wip*
61-
scripts
69+
scripts
70+
rc/camunda
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
#!/usr/bin/env python
2+
# Copyright (c) 2023-2024 Geosiris.
3+
# SPDX-License-Identifier: Apache-2.0
4+
"""
5+
Example demonstrating the keep_open feature of EpcStreamReader.
6+
7+
This example shows how using keep_open=True improves performance when
8+
performing multiple operations on an EPC file by keeping the ZIP file
9+
open instead of reopening it for each operation.
10+
"""
11+
12+
import time
13+
import sys
14+
from pathlib import Path
15+
16+
# Add src directory to path
17+
src_path = Path(__file__).parent.parent / "src"
18+
sys.path.insert(0, str(src_path))
19+
20+
from energyml.utils.epc_stream import EpcStreamReader
21+
22+
23+
def benchmark_without_keep_open(epc_path: str, num_operations: int = 10):
24+
"""Benchmark reading objects without keep_open."""
25+
print(f"\nBenchmark WITHOUT keep_open ({num_operations} operations):")
26+
print("=" * 60)
27+
28+
start = time.time()
29+
30+
# Create reader without keep_open
31+
with EpcStreamReader(epc_path, keep_open=False, cache_size=5) as reader:
32+
metadata_list = reader.list_object_metadata()
33+
34+
if not metadata_list:
35+
print(" No objects in EPC file")
36+
return 0
37+
38+
# Perform multiple read operations
39+
for i in range(min(num_operations, len(metadata_list))):
40+
meta = metadata_list[i % len(metadata_list)]
41+
if meta.identifier:
42+
_ = reader.get_object_by_identifier(meta.identifier)
43+
if i == 0:
44+
print(f" First object: {meta.object_type}")
45+
46+
elapsed = time.time() - start
47+
print(f" Time: {elapsed:.4f}s")
48+
print(f" Avg per operation: {elapsed / num_operations:.4f}s")
49+
50+
return elapsed
51+
52+
53+
def benchmark_with_keep_open(epc_path: str, num_operations: int = 10):
54+
"""Benchmark reading objects with keep_open."""
55+
print(f"\nBenchmark WITH keep_open ({num_operations} operations):")
56+
print("=" * 60)
57+
58+
start = time.time()
59+
60+
# Create reader with keep_open
61+
with EpcStreamReader(epc_path, keep_open=True, cache_size=5) as reader:
62+
metadata_list = reader.list_object_metadata()
63+
64+
if not metadata_list:
65+
print(" No objects in EPC file")
66+
return 0
67+
68+
# Perform multiple read operations
69+
for i in range(min(num_operations, len(metadata_list))):
70+
meta = metadata_list[i % len(metadata_list)]
71+
if meta.identifier:
72+
_ = reader.get_object_by_identifier(meta.identifier)
73+
if i == 0:
74+
print(f" First object: {meta.object_type}")
75+
76+
elapsed = time.time() - start
77+
print(f" Time: {elapsed:.4f}s")
78+
print(f" Avg per operation: {elapsed / num_operations:.4f}s")
79+
80+
return elapsed
81+
82+
83+
def demonstrate_file_modification_with_keep_open(epc_path: str):
84+
"""Demonstrate that modifications work correctly with keep_open."""
85+
print("\nDemonstrating file modifications with keep_open:")
86+
print("=" * 60)
87+
88+
with EpcStreamReader(epc_path, keep_open=True) as reader:
89+
metadata_list = reader.list_object_metadata()
90+
original_count = len(metadata_list)
91+
print(f" Original object count: {original_count}")
92+
93+
if metadata_list:
94+
# Get first object
95+
first_obj = reader.get_object_by_identifier(metadata_list[0].identifier)
96+
print(f" Retrieved object: {metadata_list[0].object_type}")
97+
98+
# Update the object (re-add it)
99+
identifier = reader.update_object(first_obj)
100+
print(f" Updated object: {identifier}")
101+
102+
# Verify we can still read it after update
103+
updated_obj = reader.get_object_by_identifier(identifier)
104+
assert updated_obj is not None, "Failed to read object after update"
105+
print(" ✓ Object successfully read after update")
106+
107+
# Verify object count is the same
108+
new_metadata_list = reader.list_object_metadata()
109+
new_count = len(new_metadata_list)
110+
print(f" New object count: {new_count}")
111+
112+
if new_count == original_count:
113+
print(" ✓ Object count unchanged (correct)")
114+
else:
115+
print(f" ✗ Object count changed: {original_count} -> {new_count}")
116+
117+
118+
def demonstrate_proper_cleanup():
119+
"""Demonstrate that persistent ZIP file is properly closed."""
120+
print("\nDemonstrating proper cleanup:")
121+
print("=" * 60)
122+
123+
temp_path = "temp_test.epc"
124+
125+
try:
126+
# Create a temporary EPC file
127+
reader = EpcStreamReader(temp_path, keep_open=True)
128+
print(" Created EpcStreamReader with keep_open=True")
129+
130+
# Manually close
131+
reader.close()
132+
print(" ✓ Manually closed reader")
133+
134+
# Create another reader and let it go out of scope
135+
reader2 = EpcStreamReader(temp_path, keep_open=True)
136+
print(" Created second EpcStreamReader")
137+
del reader2
138+
print(" ✓ Reader deleted (automatic cleanup via __del__)")
139+
140+
# Create reader in context manager
141+
with EpcStreamReader(temp_path, keep_open=True) as _:
142+
print(" Created third EpcStreamReader in context manager")
143+
print(" ✓ Context manager exited (automatic cleanup)")
144+
145+
finally:
146+
# Clean up temp file
147+
if Path(temp_path).exists():
148+
Path(temp_path).unlink()
149+
150+
151+
def main():
152+
"""Run all examples."""
153+
print("EpcStreamReader keep_open Feature Demonstration")
154+
print("=" * 60)
155+
156+
# You'll need to provide a valid EPC file path
157+
epc_path = "wip/epc_test.epc"
158+
159+
if not Path(epc_path).exists():
160+
print(f"\nError: EPC file not found: {epc_path}")
161+
print("Please provide a valid EPC file path in the script.")
162+
print("\nRunning cleanup demonstration only:")
163+
demonstrate_proper_cleanup()
164+
return
165+
166+
try:
167+
# Run benchmarks
168+
num_ops = 20
169+
170+
time_without = benchmark_without_keep_open(epc_path, num_ops)
171+
time_with = benchmark_with_keep_open(epc_path, num_ops)
172+
173+
# Show comparison
174+
print("\n" + "=" * 60)
175+
print("Performance Comparison:")
176+
print("=" * 60)
177+
if time_with > 0 and time_without > 0:
178+
speedup = time_without / time_with
179+
improvement = ((time_without - time_with) / time_without) * 100
180+
print(f" Speedup: {speedup:.2f}x")
181+
print(f" Improvement: {improvement:.1f}%")
182+
183+
if speedup > 1.1:
184+
print("\n ✓ keep_open=True significantly improves performance!")
185+
elif speedup > 1.0:
186+
print("\n ✓ keep_open=True slightly improves performance")
187+
else:
188+
print("\n Note: For this workload, the difference is minimal")
189+
print(" (cache effects or small file)")
190+
191+
# Demonstrate modifications
192+
demonstrate_file_modification_with_keep_open(epc_path)
193+
194+
# Demonstrate cleanup
195+
demonstrate_proper_cleanup()
196+
197+
print("\n" + "=" * 60)
198+
print("All demonstrations completed successfully!")
199+
print("=" * 60)
200+
201+
except Exception as e:
202+
print(f"\nError: {e}")
203+
import traceback
204+
205+
traceback.print_exc()
206+
207+
208+
if __name__ == "__main__":
209+
main()

energyml-utils/example/main.py

Lines changed: 72 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,27 @@
11
# Copyright (c) 2023-2024 Geosiris.
22
# SPDX-License-Identifier: Apache-2.0
33
import sys
4+
import logging
45
from pathlib import Path
56
import re
67
from dataclasses import fields
78

9+
from energyml.utils.constants import (
10+
RGX_CONTENT_TYPE,
11+
EpcExportVersion,
12+
date_to_epoch,
13+
epoch,
14+
epoch_to_date,
15+
gen_uuid,
16+
get_domain_version_from_content_or_qualified_type,
17+
parse_content_or_qualified_type,
18+
parse_content_type,
19+
)
20+
821
src_path = Path(__file__).parent.parent / "src"
922
sys.path.insert(0, str(src_path))
1023

11-
from energyml.eml.v2_3.commonv2 import *
24+
from energyml.eml.v2_3.commonv2 import Citation, DataObjectReference, ExistenceKind, Activity
1225
from energyml.eml.v2_3.commonv2 import AbstractObject
1326
from energyml.resqml.v2_0_1.resqmlv2 import DoubleHdf5Array
1427
from energyml.resqml.v2_0_1.resqmlv2 import TriangulatedSetRepresentation as Tr20
@@ -22,17 +35,70 @@
2235

2336
# from src.energyml.utils.data.hdf import *
2437
from energyml.utils.data.helper import get_projected_uom, is_z_reversed
25-
from energyml.utils.epc import *
26-
from energyml.utils.introspection import *
27-
from energyml.utils.manager import *
28-
from energyml.utils.serialization import *
38+
from energyml.utils.epc import (
39+
Epc,
40+
EPCRelsRelationshipType,
41+
as_dor,
42+
create_energyml_object,
43+
create_external_part_reference,
44+
gen_energyml_object_path,
45+
get_reverse_dor_list,
46+
)
47+
from energyml.utils.introspection import (
48+
class_match_rgx,
49+
copy_attributes,
50+
get_class_attributes,
51+
get_class_fields,
52+
get_class_from_content_type,
53+
get_class_from_name,
54+
get_class_from_qualified_type,
55+
get_class_methods,
56+
get_content_type_from_class,
57+
get_obj_pkg_pkgv_type_uuid_version,
58+
get_obj_uri,
59+
get_object_attribute,
60+
get_obj_uuid,
61+
get_object_attribute_rgx,
62+
get_qualified_type_from_class,
63+
is_abstract,
64+
is_primitive,
65+
random_value_from_class,
66+
search_attribute_matching_name,
67+
search_attribute_matching_name_with_path,
68+
search_attribute_matching_type,
69+
search_attribute_matching_type_with_path,
70+
)
71+
from energyml.utils.manager import (
72+
# create_energyml_object,
73+
# create_external_part_reference,
74+
dict_energyml_modules,
75+
get_class_pkg,
76+
get_class_pkg_version,
77+
get_classes_matching_name,
78+
get_sub_classes,
79+
list_energyml_modules,
80+
)
81+
from energyml.utils.serialization import (
82+
read_energyml_xml_file,
83+
read_energyml_xml_str,
84+
serialize_json,
85+
JSON_VERSION,
86+
serialize_xml,
87+
)
2988
from energyml.utils.validation import (
3089
patterns_validation,
3190
dor_validation,
3291
validate_epc,
3392
correct_dor,
3493
)
35-
from energyml.utils.xml import *
94+
from energyml.utils.xml import (
95+
find_schema_version_in_element,
96+
get_class_name_from_xml,
97+
get_root_namespace,
98+
get_root_type,
99+
get_tree,
100+
get_xml_encoding,
101+
)
36102
from energyml.utils.data.datasets_io import HDF5FileReader, get_path_in_external_with_path
37103

38104
fi_cit = Citation(

0 commit comments

Comments
 (0)