loculus-project · anna-parker · May 12, 2026 · May 18, 2026 · chatgpt-codex-connector · May 18, 2026
diff --git a/kubernetes/loculus/values.schema.json b/kubernetes/loculus/values.schema.json
@@ -686,7 +686,7 @@
                     "docsIncludePrefix": false,
                     "type": "string",
                     "enum": ["ALL", "ANY"],
-                    "description": "If multi-segmented viruses should require ALL segments (that the user provides) align or ANY segment aligns"
+                    "description": "If multi-segmented viruses should require ALL segments (that the user provides) align or ANY segment aligns (in case ANY it must still be possible to assign the segments)"
                   },
                   "segment_classification_method": {
                     "groups": ["nextcladePipelineConfigFile"],

diff --git a/preprocessing/nextclade/src/loculus_preprocessing/config.py b/preprocessing/nextclade/src/loculus_preprocessing/config.py
@@ -44,8 +44,8 @@ class ProcessingSpec(BaseModel):
 
 class AlignmentRequirement(StrEnum):
     # Determines whether ALL or ANY segments that a user provides must align.
-    # ANY: warn if some segments fail and some segments align
-    # ALL: error if any segment fails even if some segments align
+    # ANY: warn if some segments can be assigned and not aligned while other segments align
+    # ALL: error if any segment fails alignment even if some segments align
     # NONE: do not align any segments, just process them as-is
     # - set if no nextclade dataset is provided
     ANY = "ANY"

diff --git a/preprocessing/nextclade/src/loculus_preprocessing/nextclade.py b/preprocessing/nextclade/src/loculus_preprocessing/nextclade.py
@@ -17,7 +17,7 @@
 
 from loculus_preprocessing.sequence_checks import error_on_excess_sequences
 
-from .config import AlignmentRequirement, Config, NextcladeSequenceAndDataset, SequenceName
+from .config import Config, NextcladeSequenceAndDataset, SequenceName
 from .datatypes import (
     AccessionVersion,
     Alert,
@@ -376,7 +376,7 @@ def is_valid_dataset_match(method, best_dataset_id, dataset):
     return best_dataset_id in accepted__dataset_matches_or_default(dataset)
 
 
-def assign_segment(  # noqa: C901
+def assign_segment(
     entry: UnprocessedEntry,
     id_map: dict[tuple[AccessionVersion, FastaId], str],
     best_hits: pd.DataFrame,
@@ -385,33 +385,24 @@ def assign_segment(  # noqa: C901
     """
     Assign sequences to segments based on best hits from nextclade align, nextclade sort or diamond
     If a segment has multiple references assign to the reference with highest alignment score
-    1. If no best hit for a sequence, add error/warning about unaligned sequence
-    2. If best hit does not match any accepted reference, add error/warning about unaligned sequence
+    1. If no best hit for a sequence, add error about unaligned sequence
+    2. If best hit does not match any accepted reference, add error about unaligned sequence
     3. If multiple sequences match the same segment (also if they match different references of
     that segment), add error about duplicate segments
-    4. If no sequences assigned and no errors about unaligned/duplicate, add error about
-       no sequence data found (e.g. when alignment requirement is ANY and all sequences miss)
     """
     sort_results_map: dict[SegmentName, list[AssignedSequence]] = defaultdict(list)
     sequence_assignment = SequenceAssignment()
 
-    has_unaligned_sequence = False
-    has_duplicate_segments = False
-
     for fasta_id in entry.data.unalignedNucleotideSequences:
         seq_id = id_map[entry.accessionVersion, fasta_id]
         if seq_id not in best_hits[SequenceIdentifier].unique():
-            has_unaligned_sequence = True
             method = config.segment_classification_method.display_name
             annotation = sequence_annotation(
                 f"Sequence with fasta id {fasta_id} does not match any reference for "
                 f"organism: {config.organism} per `{method}`. "
                 "Double check you are submitting to the correct organism."
             )
-            if config.alignment_requirement == AlignmentRequirement.ALL:
-                sequence_assignment.alert.errors.append(annotation)
-            else:
-                sequence_assignment.alert.warnings.append(annotation)
+            sequence_assignment.alert.errors.append(annotation)
             continue
 
         best_hit = best_hits[best_hits[SequenceIdentifier] == seq_id]
@@ -429,21 +420,16 @@ def assign_segment(  # noqa: C901
                 break
 
         if not_found:
-            has_unaligned_sequence = True
             annotation = sequence_annotation(
                 f"Sequence {fasta_id} best matches {best_dataset_id}, "
                 "which is currently not an accepted option for organism: "
                 f"{config.organism}. It is therefore not possible to release. "
                 "Contact the administrator if you think this message is an error."
             )
-            if config.alignment_requirement == AlignmentRequirement.ALL:
-                sequence_assignment.alert.errors.append(annotation)
-            else:
-                sequence_assignment.alert.warnings.append(annotation)
+            sequence_assignment.alert.errors.append(annotation)
 
     for segment, ids in sort_results_map.items():
         if len(ids) > 1:
-            has_duplicate_segments = True
             sequence_assignment.alert.errors.append(
                 sequence_annotation(
                     f"Multiple sequences (with fasta ids: {', '.join([id.fasta_id for id in ids])})"
@@ -457,18 +443,6 @@ def assign_segment(  # noqa: C901
             entry.data.unalignedNucleotideSequences[ids[0].fasta_id]
         )
 
-    if (
-        len(sequence_assignment.unalignedNucleotideSequences) == 0
-        and not has_duplicate_segments
-        and (not has_unaligned_sequence or config.alignment_requirement == AlignmentRequirement.ANY)
-    ):
-        sequence_assignment.alert.errors.append(
-            sequence_annotation(
-                "No sequence data could be classified - "
-                "check you are submitting to the correct organism.",
-            )
-        )
-
     return sequence_assignment
 
 

diff --git a/preprocessing/nextclade/tests/test_nextclade_preprocessing.py b/preprocessing/nextclade/tests/test_nextclade_preprocessing.py
@@ -123,6 +123,11 @@ def sequence_with_mutation(
     return str(seq[: pos - 1]) + "A" + str(seq[pos:])
 
 
+def fragment(type: Literal["single"] | Literal["ebola-sudan"] | Literal["ebola-zaire"]) -> str:
+    seq = consensus_sequence(type)
+    return str(seq[:90])
+
+
 def ebola_sudan_aa(nuc: str, gene: Literal["VP35", "NP"]) -> str:
     match gene:
         case "NP":
@@ -761,14 +766,6 @@ def invalid_sequence() -> str:
             "length_ebola-zaire": 0,
         },
         expected_errors=build_processing_annotations(
-            [
-                ProcessingAnnotationHelper.sequence_annotation_helper(
-                    "No sequence data could be classified - "
-                    "check you are submitting to the correct organism.",
-                )
-            ]
-        ),
-        expected_warnings=build_processing_annotations(
             [
                 ProcessingAnnotationHelper.sequence_annotation_helper(
                     "Sequence with fasta id fastaHeader1 does not match any reference "
@@ -777,6 +774,7 @@ def invalid_sequence() -> str:
                 )
             ]
         ),
+        expected_warnings=[],
         expected_processed_alignment=ProcessedAlignment(
             unalignedNucleotideSequences={},
             alignedNucleotideSequences={},
@@ -787,18 +785,18 @@ def invalid_sequence() -> str:
         ),
     ),
     Case(
-        name="with one failed alignment, one succeeded",
+        name="with one failed alignment (but classified), one succeeded",
         input_metadata={},
         input_sequence={
-            "fastaHeader1": invalid_sequence(),
+            "fastaHeader1": fragment("ebola-sudan"),
             "fastaHeader2": sequence_with_mutation("ebola-zaire"),
         },
         accession_id="1",
         expected_metadata={
             "totalInsertedNucs_ebola-sudan": None,
             "totalSnps_ebola-sudan": None,
             "totalDeletedNucs_ebola-sudan": None,
-            "length_ebola-sudan": 0,
+            "length_ebola-sudan": len(fragment("ebola-sudan")),
             "totalInsertedNucs_ebola-zaire": 0,
             "totalSnps_ebola-zaire": 1,
             "totalDeletedNucs_ebola-zaire": 0,
@@ -807,16 +805,18 @@ def invalid_sequence() -> str:
         expected_errors=[],
         expected_warnings=build_processing_annotations(
             [
-                ProcessingAnnotationHelper.sequence_annotation_helper(
-                    "Sequence with fasta id fastaHeader1 does not match any reference"
-                    " for organism: multi-ebola-test per `nextclade sort`. "
-                    "Double check you are submitting to the correct organism.",
-                )
+                ProcessingAnnotationHelper(
+                    ["ebola-sudan"],
+                    ["ebola-sudan"],
+                    "Nucleotide sequence for ebola-sudan failed to align",
+                    AnnotationSourceType.NUCLEOTIDE_SEQUENCE,
+                ),
             ]
         ),
         expected_processed_alignment=ProcessedAlignment(
             unalignedNucleotideSequences={
                 "ebola-zaire": sequence_with_mutation("ebola-zaire"),
+                "ebola-sudan": fragment("ebola-sudan"),
             },
             alignedNucleotideSequences={
                 "ebola-zaire": sequence_with_mutation("ebola-zaire"),
@@ -827,7 +827,7 @@ def invalid_sequence() -> str:
                 "LEbolaZaire": ebola_zaire_aa(sequence_with_mutation("ebola-zaire"), "L"),
             },
             aminoAcidInsertions={},
-            sequenceNameToFastaId={"ebola-zaire": "fastaHeader2"},
+            sequenceNameToFastaId={"ebola-zaire": "fastaHeader2", "ebola-sudan": "fastaHeader1"},
         ),
     ),
 ]
@@ -849,14 +849,6 @@ def invalid_sequence() -> str:
             "length_ebola-zaire": 0,
         },
         expected_errors=build_processing_annotations(
-            [
-                ProcessingAnnotationHelper.sequence_annotation_helper(
-                    "No sequence data could be classified - "
-                    "check you are submitting to the correct organism.",
-                )
-            ]
-        ),
-        expected_warnings=build_processing_annotations(
             [
                 ProcessingAnnotationHelper.sequence_annotation_helper(
                     "Sequence with fasta id fastaHeader1 does not match any reference for "
@@ -865,6 +857,7 @@ def invalid_sequence() -> str:
                 )
             ]
         ),
+        expected_warnings=[],
         expected_processed_alignment=ProcessedAlignment(
             unalignedNucleotideSequences={},
             alignedNucleotideSequences={},
@@ -892,8 +885,8 @@ def invalid_sequence() -> str:
             "totalDeletedNucs_ebola-zaire": 0,
             "length_ebola-zaire": len(consensus_sequence("ebola-zaire")),
         },
-        expected_errors=[],
-        expected_warnings=build_processing_annotations(
+        expected_warnings=[],
+        expected_errors=build_processing_annotations(
             [
                 ProcessingAnnotationHelper.sequence_annotation_helper(
                     "Sequence with fasta id fastaHeader1 does not match any reference for "