Skip to content

Commit a5a84e3

Browse files
authored
feat: mock APIs for snapshot tests (#361)
* feat: update US tests to use snapshotted API return data * chore: update snapshots * feat: update Makefile for new testing structure * feat: add github workflow for regular validation of API * chore: update docs
1 parent f4fdadd commit a5a84e3

41 files changed

Lines changed: 49052 additions & 3372 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
name: API snapshot check
2+
3+
on:
4+
schedule:
5+
- cron: '0 16 * * 1' # Monday 08:00 PST (UTC-8)
6+
workflow_dispatch:
7+
8+
jobs:
9+
check-api-snapshot:
10+
runs-on: ubuntu-latest
11+
12+
steps:
13+
- name: Checkout
14+
uses: actions/checkout@v5
15+
16+
- name: Update and install Linux packages
17+
run: |
18+
sudo add-apt-repository ppa:ubuntugis/ubuntugis-unstable
19+
sudo apt-get update
20+
sudo apt-get install libgdal-dev gdal-bin python3-gdal
21+
22+
- name: Setup Python
23+
uses: actions/setup-python@v6
24+
with:
25+
python-version: '3.13.7'
26+
cache-dependency-path: |
27+
'requirements.txt'
28+
'requirements-dev.txt'
29+
30+
- name: Install uv
31+
uses: astral-sh/setup-uv@v6
32+
with:
33+
enable-cache: true
34+
cache-dependency-glob: 'requirements**.txt'
35+
36+
- name: Install Python dependencies
37+
run: |
38+
make install DC_ENV=ci
39+
make install_dev DC_ENV=ci
40+
41+
- name: Run API snapshot tests
42+
run: make test_api_snapshot

.github/workflows/build.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ jobs:
3232
cache-dependency-glob: 'requirements**.txt'
3333

3434
- name: Install Python dependencies
35-
run: make install-dev DC_ENV=ci
35+
run: make install_dev DC_ENV=ci
3636

3737
- name: Run lint checks
3838
run: make lint
@@ -68,7 +68,7 @@ jobs:
6868
- name: Install Python dependencies
6969
run: |
7070
make install DC_ENV=ci
71-
make install-dev DC_ENV=ci
71+
make install_dev DC_ENV=ci
7272
7373
- name: Restore data cache
7474
uses: actions/cache@v4
@@ -79,7 +79,7 @@ jobs:
7979

8080
- name: Download data
8181
if: ${{ hashFiles('Data/*') == '' }}
82-
run: make download-soil-data
82+
run: make download_soil_data
8383

8484
- name: Start soil id DB
8585
run: docker compose up -d

Makefile

Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@ endif
55
install:
66
uv pip install -r requirements.txt $(UV_FLAGS)
77

8-
install-dev:
8+
install_dev:
99
uv pip install -r requirements-dev.txt $(UV_FLAGS)
1010

11-
setup-git-hooks:
11+
setup_git_hooks:
1212
@pre-commit install
1313

1414
lint:
@@ -22,46 +22,54 @@ format:
2222
lock:
2323
CUSTOM_COMPILE_COMMAND="make lock" uv pip compile --upgrade --generate-hashes requirements/base.in -o requirements.txt
2424

25-
lock-package:
25+
lock_package:
2626
CUSTOM_COMPILE_COMMAND="make lock" uv pip compile --upgrade-package $(PACKAGE) --generate-hashes --emit-build-options requirements/base.in requirements/deploy.in -o requirements.txt
2727

28-
lock-dev:
28+
lock_dev:
2929
CUSTOM_COMPILE_COMMAND="make lock-dev" uv pip compile --upgrade --generate-hashes requirements/dev.in -o requirements-dev.txt
3030

31-
lock-dev-package:
31+
lock_dev_package:
3232
CUSTOM_COMPILE_COMMAND="make lock-dev" uv pip compile --upgrade-package $(PACKAGE) --generate-hashes requirements/dev.in -o requirements-dev.txt
3333

34-
build:
35-
echo "Building TK..."
36-
37-
check_rebuild:
38-
./scripts/rebuild.sh
39-
4034
clean:
4135
@find . -name *.pyc -delete
4236
@find . -name __pycache__ -delete
4337

44-
test: clean check_rebuild
38+
# run the standard test suite (unit + integration, no api_snapshots)
39+
test:
4540
if [ -z "$(PATTERN)" ]; then \
46-
$(DC_RUN_CMD) pytest soil_id -vv; \
41+
pytest soil_id -m "not api_snapshot"; \
4742
else \
48-
$(DC_RUN_CMD) pytest soil_id -vv -k "$(PATTERN)"; \
43+
pytest soil_id -m "not api_snapshot" -k "$(PATTERN)"; \
4944
fi
5045

51-
test_update_snapshots: clean check_rebuild
52-
if [ -z "$(PATTERN)" ]; then \
53-
$(DC_RUN_CMD) pytest soil_id --snapshot-update; \
54-
else \
55-
$(DC_RUN_CMD) pytest soil_id --snapshot-update -k "$(PATTERN)"; \
56-
fi
46+
# All tests except api_snapshot and integration (no live external APIs)
47+
test_unit:
48+
pytest soil_id -m "not api_snapshot and not integration"
49+
50+
# update the unit test snapshots (but not the API snapshots)
51+
test_update_unit_snapshots:
52+
pytest soil_id -m "not api_snapshot and not integration" --snapshot-update; \
53+
54+
# Integration smoke tests only (full live API run, no output validation)
55+
test_integration:
56+
pytest soil_id -m integration
57+
58+
# API response snapshot tests only (compares live API responses to stored snapshots)
59+
test_api_snapshot:
60+
pytest soil_id -m api_snapshot
61+
62+
# Refresh stored API response snapshots from live APIs
63+
test_update_api_snapshots:
64+
pytest soil_id -m api_snapshot --snapshot-update
5765

58-
test-verbose:
66+
test_verbose:
5967
pytest soil_id --capture=no
6068

61-
test-profile:
69+
test_profile:
6270
pytest soil_id --profile
6371

64-
test-graphs: test-profile graphs
72+
test_graphs: test-profile graphs
6573

6674
graphs:
6775
# gprof2dot -f pstats prof/combined.prof | dot -Tsvg -o prof/combined.svg
@@ -96,7 +104,7 @@ process_bulk_test_results_legacy:
96104
# 1P3xl1YRlfcMjfO_4PM39tkrrlL3hoLzv: gsmsoilmu_a_us.prj
97105
# 1K0GkqxhZiVUND6yfFmaI7tYanLktekyp: gsmsoilmu_a_us.dbf
98106
# 1z7foFFHv_mTsuxMYnfOQRvXT5LKYlYFN: SoilID_US_Areas.shz
99-
download-soil-data:
107+
download_soil_data:
100108
mkdir -p Data
101109
cd Data; \
102110
gdown 1tN23iVe6X1fcomcfveVp4w3Pwd0HJuTe; \

README.md

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,42 +2,42 @@
22

33
## Requirements
44

5-
- Python: 3.12 or better
5+
- Python: 3.12 or better
66

77
# Contributing
88

99
Configure git to automatically lint your code and validate validate your commit messages:
1010

1111
```sh
12-
$ make setup-git-hooks
12+
$ make setup_git_hooks
1313
```
1414

1515
Set up a virtual environment and install dependencies:
1616

1717
```sh
1818
$ uv venv
1919
$ source .venv/bin/activate
20-
$ make install && make install-dev
20+
$ make install && make install_dev
2121
```
2222

2323
## explanation of algorithm
2424

2525
### terminology
2626

27-
- soil map unit: (possibly disjoint) geographic area that is associated with soil component percentage / arial coverage
28-
- soil series: collection of related soil components
29-
- soil component: description of various soil properties at specific depth intervals
27+
- soil map unit: (possibly disjoint) geographic area that is associated with soil component percentage / arial coverage
28+
- soil series: collection of related soil components
29+
- soil component: description of various soil properties at specific depth intervals
3030

3131
### references
3232

33-
- equation 1 in https://landpotential.org/wp-content/uploads/2020/07/sssaj-0-0-sssaj2017.09.0337.pdf
33+
- equation 1 in https://landpotential.org/wp-content/uploads/2020/07/sssaj-0-0-sssaj2017.09.0337.pdf
3434

3535
### dependencies
3636

37-
- simple features: https://r-spatial.github.io/sf/index.html
38-
- well-known geometry: https://paleolimbot.github.io/wk/
39-
- R package for querying soilDB: https://ncss-tech.github.io/soilDB/
40-
- dplyr: https://dplyr.tidyverse.org/
37+
- simple features: https://r-spatial.github.io/sf/index.html
38+
- well-known geometry: https://paleolimbot.github.io/wk/
39+
- R package for querying soilDB: https://ncss-tech.github.io/soilDB/
40+
- dplyr: https://dplyr.tidyverse.org/
4141

4242
### algorithm
4343

@@ -72,19 +72,27 @@ Input: a specific point in lat/lon, and a set of depth intervals.
7272

7373
### Regular tests
7474

75-
There is a small suite of integration tests which can be run with the `make test` command, and gets run regularly by CI.
75+
There are several smaller test suites:
76+
77+
- There is a set of "unit" tests, which really are testing the entire codebase more or less, but don't rely on any external API services, instead using snapshotted data from those services. You can run these tests with `make test_unit`.
78+
- These tests mostly produce snapshots of algorithm output rather than validating specific properties of the output, so they moreso validate that the algorithm hasn't changed (or how it has changed) rather than that it is correct. If the snapshots have changed in a desirable way, you can update them with `make test_update_unit_snapshots`.
79+
- For US only, there is a set of "integration" tests which run the algorithm against the live API services, but just confirm that the algorithm doesn't crash, they don't validate the output since it can change over time. These can be run with `make test_integration`.
80+
- The unit and integration tests can be run together with `make test` for convenience: this is what must pass for a PR to be mergeable.
81+
- The API snapshots themselves can be checked against the live API for drift using `make test_api_snapshot`. They can be updated to the new live API values using `make test_update_api_snapshots`.
7682

7783
### Bulk test
7884

7985
There is a large suite of integration tests which takes many hours to run. It comes in the format of two scripts:
8086

81-
- Run `make generate_bulk_test_results` to run the algorithm over a collection of 3000 soil pits, which will accumulate the results in a log file.
82-
- Run `RESULTS_FILE=$RESULTS_FILE make process_bulk_test_results` to view statistics calculated over that log file.
87+
- Run `make generate_bulk_test_results_us` or `make generate_bulk_test_results_global` to run the algorithm over a collection of thousands soil pits with soil IDs given by trained data collectors, which will accumulate the results in a log file. This can take several hours or potentially need to run overnight due (especially the US tests are slow due to the speed of external API services).
88+
- Run `RESULTS_FILE=$RESULTS_FILE make process_bulk_test_results_us` or `RESULTS_FILE=$RESULTS_FILE make process_bulk_test_results_global` to view statistics calculated over that log file. This can be run concurrently with `generate_bulk_test_results` to see statistics over the soil pits which have been run so far.
89+
- It has been nice to have these as two separate scripts because then you can iterate on the processing and display of statistics without interrupting the data collection.
90+
- It would be of value to also be able to run these US tests against snapshotted API data, it would just be much more onerous to collect and update the data.
8391

8492
## Acknowledgements
8593

86-
- Beaudette, D., Roudier, P., Brown, A. (2023). [aqp: Algorithms for Quantitative Pedology](https://CRAN.R-project.org/package=aqp). R package version 2.0.
94+
- Beaudette, D., Roudier, P., Brown, A. (2023). [aqp: Algorithms for Quantitative Pedology](https://CRAN.R-project.org/package=aqp). R package version 2.0.
8795

88-
- Beaudette, D.E., Roudier, P., O'Geen, A.T. [Algorithms for quantitative pedology: A toolkit for soil scientists, Computers & Geosciences](http://dx.doi.org/10.1016/j.cageo.2012.10.020), Volume 52, March 2013, Pages 258-268, ISSN 0098-3004.
96+
- Beaudette, D.E., Roudier, P., O'Geen, A.T. [Algorithms for quantitative pedology: A toolkit for soil scientists, Computers & Geosciences](http://dx.doi.org/10.1016/j.cageo.2012.10.020), Volume 52, March 2013, Pages 258-268, ISSN 0098-3004.
8997

90-
- soilDB: Beaudette, D., Skovlin, J., Roecker, S., Brown, A. (2024). [soilDB: Soil Database Interface](https://CRAN.R-project.org/package=soilDB). R package version 2.8.3.
98+
- soilDB: Beaudette, D., Skovlin, J., Roecker, S., Brown, A. (2024). [soilDB: Soil Database Interface](https://CRAN.R-project.org/package=soilDB). R package version 2.8.3.

pyproject.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ extend-ignore = ["E203"]
2424
[tool.pytest.ini_options]
2525
log_cli = true
2626
log_cli_level = "INFO"
27+
markers = [
28+
"integration: runs against live external APIs; does not validate output",
29+
"api_snapshot: captures and compares API response snapshots against live APIs",
30+
]
2731

2832
[tool.setuptools.dynamic]
2933
dependencies = { file = ["requirements/base.in"] }

0 commit comments

Comments
 (0)