VariantEffect · bencap · Mar 10, 2026 · Mar 6, 2026 · Mar 6, 2026
diff --git a/src/mavedb/lib/score_sets.py b/src/mavedb/lib/score_sets.py
@@ -238,18 +238,26 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search:
     score_sets: list[ScoreSet] = (
         query.join(ScoreSet.experiment)
         .options(
+            # Use selectinload for one-to-many experiment relationships to avoid row
+            # multiplication in the main query. joinedload would LEFT OUTER JOIN these
+            # into the main SQL query, and because they're nested inside contains_eager,
+            # SQLAlchemy's subquery-wrapping logic doesn't protect the LIMIT clause from
+            # being applied to multiplied rows rather than unique score sets. This would
+            # cause the count of returned score sets to be less than the requested limit,
+            # and the count query would be triggered even when the number of unique score
+            # sets in the main query results exceeds the limit.
             contains_eager(ScoreSet.experiment).options(
                 joinedload(Experiment.experiment_set),
-                joinedload(Experiment.keyword_objs).joinedload(
+                selectinload(Experiment.keyword_objs).joinedload(
                     ExperimentControlledKeywordAssociation.controlled_keyword
                 ),
                 joinedload(Experiment.created_by),
                 joinedload(Experiment.modified_by),
-                joinedload(Experiment.doi_identifiers),
-                joinedload(Experiment.publication_identifier_associations).joinedload(
+                selectinload(Experiment.doi_identifiers),
+                selectinload(Experiment.publication_identifier_associations).joinedload(
                     ExperimentPublicationIdentifierAssociation.publication
                 ),
-                joinedload(Experiment.raw_read_identifiers),
+                selectinload(Experiment.raw_read_identifiers),
                 selectinload(Experiment.score_sets).options(
                     joinedload(ScoreSet.doi_identifiers),
                     joinedload(ScoreSet.publication_identifier_associations).joinedload(
@@ -292,7 +300,7 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search:
         # query.
         score_sets = score_sets[: search.limit]
         count_query = db.query(ScoreSet)
-        build_search_score_sets_query_filter(db, count_query, owner_or_contributor, search)
+        count_query = build_search_score_sets_query_filter(db, count_query, owner_or_contributor, search)
         num_score_sets = count_query.order_by(None).limit(None).count()
 
     save_to_logging_context({"matching_resources": num_score_sets})

diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py
@@ -38,6 +38,7 @@
     TEST_BRNICH_SCORE_CALIBRATION_CLASS_BASED,
     TEST_BRNICH_SCORE_CALIBRATION_RANGE_BASED,
     TEST_CROSSREF_IDENTIFIER,
+    TEST_EXPERIMENT_WITH_KEYWORD,
     TEST_GNOMAD_DATA_VERSION,
     TEST_INACTIVE_LICENSE,
     TEST_MAPPED_VARIANT_WITH_HGVS_G_EXPRESSION,
@@ -2403,6 +2404,51 @@ def test_search_filter_options_hidden_by_published_superseding_version(
     assert target_name not in target_names
 
 
+def test_search_score_sets_reports_correct_total_count_with_limit(
+    session, data_provider, client, setup_router_db, data_files
+):
+    """When more published score sets exist than the search limit, num_score_sets should reflect the true total."""
+    num_score_sets = 3
+    for i in range(num_score_sets):
+        experiment = create_experiment(client, {"title": f"Experiment {i}"})
+        score_set = create_seq_score_set(client, experiment["urn"], update={"title": f"Score Set {i}"})
+        score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv")
+
+        with patch.object(arq.ArqRedis, "enqueue_job", return_value=None):
+            publish_score_set(client, score_set["urn"])
+
+    search_payload = {"limit": 2}
+    response = client.post("/api/v1/score-sets/search", json=search_payload)
+    assert response.status_code == 200
+    assert len(response.json()["scoreSets"]) == 2
+    assert response.json()["numScoreSets"] == num_score_sets
+
+
+def test_search_score_sets_not_affected_by_experiment_metadata(
+    session, data_provider, client, setup_router_db, data_files
+):
+    """Experiments with multiple keywords should not reduce the number of score sets returned by search.
+
+    This is a regression test for a bug where joinedload on one-to-many experiment relationships caused row
+    multiplication in the main SQL query. The LIMIT clause was applied to the multiplied rows rather than unique
+    score sets, resulting in fewer results than expected.
+    """
+    num_score_sets = 3
+    for i in range(num_score_sets):
+        experiment = create_experiment(client, {**TEST_EXPERIMENT_WITH_KEYWORD, "title": f"Experiment {i}"})
+        score_set = create_seq_score_set(client, experiment["urn"], update={"title": f"Score Set {i}"})
+        score_set = mock_worker_variant_insertion(client, session, data_provider, score_set, data_files / "scores.csv")
+
+        with patch.object(arq.ArqRedis, "enqueue_job", return_value=None):
+            publish_score_set(client, score_set["urn"])
+
+    search_payload = {"limit": 2}
+    response = client.post("/api/v1/score-sets/search", json=search_payload)
+    assert response.status_code == 200
+    assert len(response.json()["scoreSets"]) == 2
+    assert response.json()["numScoreSets"] == num_score_sets
+
+
 ########################################################################################################################
 # Score set deletion
 ########################################################################################################################