Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions src/mavedb/lib/score_sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,18 +238,26 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search:
score_sets: list[ScoreSet] = (
query.join(ScoreSet.experiment)
.options(
# Use selectinload for one-to-many experiment relationships to avoid row
# multiplication in the main query. joinedload would LEFT OUTER JOIN these
# into the main SQL query, and because they're nested inside contains_eager,
# SQLAlchemy's subquery-wrapping logic doesn't protect the LIMIT clause from
# being applied to multiplied rows rather than unique score sets. This would
# cause the count of returned score sets to be less than the requested limit,
# and the count query would be triggered even when the number of unique score
# sets in the main query results exceeds the limit.
contains_eager(ScoreSet.experiment).options(
joinedload(Experiment.experiment_set),
joinedload(Experiment.keyword_objs).joinedload(
selectinload(Experiment.keyword_objs).joinedload(
ExperimentControlledKeywordAssociation.controlled_keyword
),
joinedload(Experiment.created_by),
joinedload(Experiment.modified_by),
joinedload(Experiment.doi_identifiers),
joinedload(Experiment.publication_identifier_associations).joinedload(
selectinload(Experiment.doi_identifiers),
selectinload(Experiment.publication_identifier_associations).joinedload(
ExperimentPublicationIdentifierAssociation.publication
),
joinedload(Experiment.raw_read_identifiers),
selectinload(Experiment.raw_read_identifiers),
selectinload(Experiment.score_sets).options(
joinedload(ScoreSet.doi_identifiers),
joinedload(ScoreSet.publication_identifier_associations).joinedload(
Expand Down Expand Up @@ -292,7 +300,7 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search:
# query.
score_sets = score_sets[: search.limit]
count_query = db.query(ScoreSet)
build_search_score_sets_query_filter(db, count_query, owner_or_contributor, search)
count_query = build_search_score_sets_query_filter(db, count_query, owner_or_contributor, search)
num_score_sets = count_query.order_by(None).limit(None).count()

save_to_logging_context({"matching_resources": num_score_sets})
Expand Down
46 changes: 46 additions & 0 deletions tests/routers/test_score_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED,
TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED,
TEST_CROSSREF_IDENTIFIER,
TEST_EXPERIMENT_WITH_KEYWORD,
TEST_GNOMAD_DATA_VERSION,
TEST_INACTIVE_LICENSE,
TEST_MAPPED_VARIANT_WITH_HGVS_G_EXPRESSION,
Expand Down Expand Up @@ -2403,6 +2404,51 @@ def test_search_filter_options_hidden_by_published_superseding_version(
assert target_name not in target_names


def test_search_score_sets_reports_correct_total_count_with_limit(
session, data_provider, client, setup_router_db, data_files
):
"""When more published score sets exist than the search limit, num_score_sets should reflect the true total."""
num_score_sets = 3
for i in range(num_score_sets):
experiment = create_experiment(client, {"title": f"Experiment {i}"})
score_set = create_seq_score_set(client, experiment["urn"], update={"title": f"Score Set {i}"})
score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv")

with patch.object(arq.ArqRedis, "enqueue_job", return_value=None):
publish_score_set(client, score_set["urn"])

search_payload = {"limit": 2}
response = client.post("/api/v1/score-sets/search", json=search_payload)
assert response.status_code == 200
assert len(response.json()["scoreSets"]) == 2
assert response.json()["numScoreSets"] == num_score_sets


def test_search_score_sets_not_affected_by_experiment_metadata(
session, data_provider, client, setup_router_db, data_files
):
"""Experiments with multiple keywords should not reduce the number of score sets returned by search.

This is a regression test for a bug where joinedload on one-to-many experiment relationships caused row
multiplication in the main SQL query. The LIMIT clause was applied to the multiplied rows rather than unique
score sets, resulting in fewer results than expected.
"""
num_score_sets = 3
for i in range(num_score_sets):
experiment = create_experiment(client, {**TEST_EXPERIMENT_WITH_KEYWORD, "title": f"Experiment {i}"})
score_set = create_seq_score_set(client, experiment["urn"], update={"title": f"Score Set {i}"})
score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv")

with patch.object(arq.ArqRedis, "enqueue_job", return_value=None):
publish_score_set(client, score_set["urn"])

search_payload = {"limit": 2}
response = client.post("/api/v1/score-sets/search", json=search_payload)
assert response.status_code == 200
assert len(response.json()["scoreSets"]) == 2
assert response.json()["numScoreSets"] == num_score_sets


########################################################################################################################
# Score set deletion
########################################################################################################################
Expand Down