From a8ea1d2577e2efff8c6f299934d7185144a3b3b7 Mon Sep 17 00:00:00 2001 From: hansenp <10495485+hansenp@users.noreply.github.com> Date: Tue, 25 Nov 2025 16:56:33 +0100 Subject: [PATCH 1/5] Implemented rescaling of unnormalized log BOQA scores. --- .../core/prioritisers/BoqaPrioritiser.java | 41 +++++++++++++++++-- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/prioritisers/BoqaPrioritiser.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/prioritisers/BoqaPrioritiser.java index b3699df79..ad400cdd9 100644 --- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/prioritisers/BoqaPrioritiser.java +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/prioritisers/BoqaPrioritiser.java @@ -7,6 +7,7 @@ import org.p2gx.boqa.core.Counter; import org.p2gx.boqa.core.DiseaseData; import org.p2gx.boqa.core.PatientData; +import org.p2gx.boqa.core.algorithm.AlgorithmParameters; import org.p2gx.boqa.core.analysis.BoqaAnalysisResult; import org.p2gx.boqa.core.analysis.BoqaPatientAnalyzer; import org.p2gx.boqa.core.analysis.BoqaResult; @@ -46,8 +47,11 @@ public Stream prioritise(List hpoIds, List gen logger.info("Running BOQA prioritiser..."); var observedHpoIds = hpoIds.stream().map(TermId::of).collect(toUnmodifiableSet()); PatientData patientData = new ExomiserPatientData(observedHpoIds, Collections.emptySet()); - BoqaAnalysisResult boqaAnalysisResult = BoqaPatientAnalyzer.computeBoqaResults(patientData, counter, Integer.MAX_VALUE); - List rescaledBoqaResults = softMaxScaledBoqaResultScores(boqaAnalysisResult.boqaResults()); + double alpha = 1.0/19077; + double beta = 0.9; + AlgorithmParameters params = AlgorithmParameters.create(alpha, beta); + BoqaAnalysisResult boqaAnalysisResult = BoqaPatientAnalyzer.computeBoqaExomiserResults(patientData, counter, params); + List rescaledBoqaResults = reScaledRawLogExomiserScores(boqaAnalysisResult.boqaResults()); logger.debug("Top 10 BOQA results:"); rescaledBoqaResults.stream().sorted(Comparator.comparing(BoqaResult::boqaScore)).limit(10).forEach(b -> logger.debug("BOQA score: {} {} {}", b.counts().diseaseId(), b.boqaScore(), b.counts().diseaseLabel())); Map boqaResultsByDiseaseId = rescaledBoqaResults.stream() @@ -55,6 +59,37 @@ public Stream prioritise(List hpoIds, List gen return genes.stream().map(prioritiseGene(boqaResultsByDiseaseId)); } + /** + * Takes a list of BOQA results and transforms the raw BOQA log scores as follows: + * + * boqaExomiserScore_i = (boqaRawLogScore_i + abs(min(boqaRawLogScore))) / (max(boqaRawLogScore) + abs(min(boqaRawLogScore))) + * + * @param boqaResults + * @return reScaledBoqaResults + */ + private static List reScaledRawLogExomiserScores(List boqaResults) { + + int numBoqaResults = boqaResults.size(); + List rankedBoqaResults = boqaResults.stream().sorted(Comparator.comparing(BoqaResult::boqaScore)).toList(); + List rescaledBoqaResults = new ArrayList<>(numBoqaResults); + + List rawLogBoqaScores = new ArrayList<>(numBoqaResults); + for (int i = 0; i < numBoqaResults; i++) { + BoqaResult boqaResult = rankedBoqaResults.get(i); + double rawLogBoqaScore = boqaResult.boqaScore(); + rawLogBoqaScores.add(rawLogBoqaScore); + } + double x = Math.abs(Collections.min(rawLogBoqaScores)); + double y = Collections.max(rawLogBoqaScores) + x; + for (int i = 0; i < numBoqaResults; i++) { + BoqaResult boqaResult = rankedBoqaResults.get(i); + double rawLogBoqaScore = rawLogBoqaScores.get(i); + double boqaExomiserScore = (rawLogBoqaScore + x) / y; + rescaledBoqaResults.add(new BoqaResult(boqaResult.counts(), boqaExomiserScore)); + } + return rescaledBoqaResults; + } + /** * Rescales the BOQA result scores to adjust their range. The method normalizes the scores so that * the highest scoring result is scaled to 1.0, while preserving the relative differences between @@ -95,7 +130,7 @@ private static List softMaxScaledBoqaResultScores(List b // return (Math.exp(input) / total) == (Math.expt(score) * ( 1 / total)) return boqaResults.stream() - .map(boqaResult -> new BoqaResult(boqaResult.counts(), Math.exp(boqaResult.boqaScore() - maxScore) * scaleFactor)) + .map(boqaResult -> new BoqaResult(boqaResult.counts(), 0.5)) .toList(); } From 0c8de5baf9688267a32409b2c71eebb840129ed7 Mon Sep 17 00:00:00 2001 From: hansenp <10495485+hansenp@users.noreply.github.com> Date: Tue, 25 Nov 2025 18:54:07 +0100 Subject: [PATCH 2/5] Cleaned up and simplified (#614). --- .../core/prioritisers/BoqaPrioritiser.java | 64 ++++++++++--------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/prioritisers/BoqaPrioritiser.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/prioritisers/BoqaPrioritiser.java index ad400cdd9..ece7e16c5 100644 --- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/prioritisers/BoqaPrioritiser.java +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/prioritisers/BoqaPrioritiser.java @@ -14,7 +14,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import java.util.*; import java.util.function.Function; import java.util.stream.Stream; @@ -27,6 +26,8 @@ public class BoqaPrioritiser implements Prioritiser { private final PriorityService priorityService; private final Counter counter; + private final double alpha; + private final double beta; public BoqaPrioritiser(PriorityService priorityService, Counter counter) { // TODO: add getCounter(): Counter to Priority Service, then initialise the Counter @Lazy in the exomiser-config @@ -35,6 +36,8 @@ public BoqaPrioritiser(PriorityService priorityService, Counter counter) { // it. The Counter now takes ~ 300ms to create, but still, it would be best to move it's creation into the config code. this.priorityService = priorityService; this.counter = counter; + this.alpha = 1.0/19077; // TODO: Make alpha and beta constructor parameters + this.beta = 0.9; } @Override @@ -47,11 +50,9 @@ public Stream prioritise(List hpoIds, List gen logger.info("Running BOQA prioritiser..."); var observedHpoIds = hpoIds.stream().map(TermId::of).collect(toUnmodifiableSet()); PatientData patientData = new ExomiserPatientData(observedHpoIds, Collections.emptySet()); - double alpha = 1.0/19077; - double beta = 0.9; AlgorithmParameters params = AlgorithmParameters.create(alpha, beta); BoqaAnalysisResult boqaAnalysisResult = BoqaPatientAnalyzer.computeBoqaExomiserResults(patientData, counter, params); - List rescaledBoqaResults = reScaledRawLogExomiserScores(boqaAnalysisResult.boqaResults()); + List rescaledBoqaResults = reScaledRawLogBoqaExomiserScores(boqaAnalysisResult.boqaResults()); logger.debug("Top 10 BOQA results:"); rescaledBoqaResults.stream().sorted(Comparator.comparing(BoqaResult::boqaScore)).limit(10).forEach(b -> logger.debug("BOQA score: {} {} {}", b.counts().diseaseId(), b.boqaScore(), b.counts().diseaseLabel())); Map boqaResultsByDiseaseId = rescaledBoqaResults.stream() @@ -59,35 +60,41 @@ public Stream prioritise(List hpoIds, List gen return genes.stream().map(prioritiseGene(boqaResultsByDiseaseId)); } + /** - * Takes a list of BOQA results and transforms the raw BOQA log scores as follows: + * Transforms a list of BOQA results by rescaling their raw log scores into the range [0, 1]. * - * boqaExomiserScore_i = (boqaRawLogScore_i + abs(min(boqaRawLogScore))) / (max(boqaRawLogScore) + abs(min(boqaRawLogScore))) + *

The transformation is done as follows:

+ *
+     * boqaExomiserScore_i =
+     *     (boqaRawLogScore_i + abs(min(boqaRawLogScore)))
+     *     / (max(boqaRawLogScore) + abs(min(boqaRawLogScore)))
+     * 
* - * @param boqaResults - * @return reScaledBoqaResults + *

This ensures that the minimum raw score maps to 0, and the maximum maps to 1.

+ * + * @param boqaResults the list of BOQA results to rescale + * @return a list of BOQA results with rescaled scores */ - private static List reScaledRawLogExomiserScores(List boqaResults) { + private static List reScaledRawLogBoqaExomiserScores(List boqaResults) { - int numBoqaResults = boqaResults.size(); - List rankedBoqaResults = boqaResults.stream().sorted(Comparator.comparing(BoqaResult::boqaScore)).toList(); - List rescaledBoqaResults = new ArrayList<>(numBoqaResults); + // Extract raw BOQA log scores + List rawLogBoqaScores = + boqaResults.stream() + .map(BoqaResult::boqaScore) + .toList(); - List rawLogBoqaScores = new ArrayList<>(numBoqaResults); - for (int i = 0; i < numBoqaResults; i++) { - BoqaResult boqaResult = rankedBoqaResults.get(i); - double rawLogBoqaScore = boqaResult.boqaScore(); - rawLogBoqaScores.add(rawLogBoqaScore); - } - double x = Math.abs(Collections.min(rawLogBoqaScores)); - double y = Collections.max(rawLogBoqaScores) + x; - for (int i = 0; i < numBoqaResults; i++) { - BoqaResult boqaResult = rankedBoqaResults.get(i); - double rawLogBoqaScore = rawLogBoqaScores.get(i); - double boqaExomiserScore = (rawLogBoqaScore + x) / y; - rescaledBoqaResults.add(new BoqaResult(boqaResult.counts(), boqaExomiserScore)); - } - return rescaledBoqaResults; + // Compute offset and normalization factor + double offset = Math.abs(Collections.min(rawLogBoqaScores)); + double scale = Collections.max(rawLogBoqaScores) + offset; + + // Rescale + return boqaResults.stream() + .map(br -> { + double boqaExomiserScore = (br.boqaScore() + offset) / scale; + return new BoqaResult(br.counts(), boqaExomiserScore); + }) + .toList(); } /** @@ -135,6 +142,7 @@ private static List softMaxScaledBoqaResultScores(List b } // 1 - ( rank / numTotalDiseases) // rank-scaled score + // TODO: Produces slightly different phenotype scores when run multiple times in a row. private static List rankScaledBoqaResultScores(List boqaResults) { int numBoqaResults = boqaResults.size(); List rankedBoqaResults = boqaResults.stream().sorted(Comparator.comparing(BoqaResult::boqaScore)).toList(); @@ -146,7 +154,6 @@ private static List rankScaledBoqaResultScores(List boqa return rankScaledResults; } - /** * If the gene is not contained in the database, we return an empty * but initialized RelevanceScore object. Otherwise, we retrieve a list of @@ -259,5 +266,4 @@ public Map getIdToLabel() { return diseaseIdToLabel; } } - } From b5bc3b92af89bff9e7f3dce6015822a4e6700922 Mon Sep 17 00:00:00 2001 From: hansenp <10495485+hansenp@users.noreply.github.com> Date: Thu, 27 Nov 2025 17:28:47 +0100 Subject: [PATCH 3/5] Switched DiseaseData from DiseaseDataParser to DiseaseDataPhenolIngest (#614). --- .../PrioritiserAutoConfiguration.java | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/exomiser-spring-boot-autoconfigure/src/main/java/org/monarchinitiative/exomiser/autoconfigure/phenotype/PrioritiserAutoConfiguration.java b/exomiser-spring-boot-autoconfigure/src/main/java/org/monarchinitiative/exomiser/autoconfigure/phenotype/PrioritiserAutoConfiguration.java index 6b7f620bd..a95d7fdc8 100644 --- a/exomiser-spring-boot-autoconfigure/src/main/java/org/monarchinitiative/exomiser/autoconfigure/phenotype/PrioritiserAutoConfiguration.java +++ b/exomiser-spring-boot-autoconfigure/src/main/java/org/monarchinitiative/exomiser/autoconfigure/phenotype/PrioritiserAutoConfiguration.java @@ -25,12 +25,17 @@ import org.monarchinitiative.exomiser.core.prioritisers.PriorityFactory; import org.monarchinitiative.exomiser.core.prioritisers.util.DataMatrix; import org.monarchinitiative.exomiser.core.prioritisers.util.DataMatrixIO; +import org.monarchinitiative.phenol.annotations.formats.hpo.HpoDiseases; +import org.monarchinitiative.phenol.annotations.io.hpo.DiseaseDatabase; +import org.monarchinitiative.phenol.annotations.io.hpo.HpoDiseaseLoader; +import org.monarchinitiative.phenol.annotations.io.hpo.HpoDiseaseLoaderOptions; +import org.monarchinitiative.phenol.annotations.io.hpo.HpoDiseaseLoaders; import org.monarchinitiative.phenol.io.OntologyLoader; import org.monarchinitiative.phenol.ontology.data.Ontology; import org.p2gx.boqa.core.Counter; import org.p2gx.boqa.core.DiseaseData; import org.p2gx.boqa.core.algorithm.BoqaSetCounter; -import org.p2gx.boqa.core.diseases.DiseaseDataParser; +import org.p2gx.boqa.core.diseases.DiseaseDataPhenolIngest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.boot.autoconfigure.condition.ConditionalOnClass; @@ -45,6 +50,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.*; +import java.util.stream.Collectors; /** * @author Jules Jacobsen @@ -166,9 +172,16 @@ Counter boqaCounter(Ontology hpoOntology) { // Parse disease-HPO associations into DiseaseData object Path hpoaFilePath = phenotypeDataDirectory().resolve("phenotype.hpoa"); logger.debug("Importing disease phenotype associations from file: {} ...", hpoaFilePath); - DiseaseData diseaseData = null; + DiseaseData diseaseData; try { - diseaseData = DiseaseDataParser.parseDiseaseDataFromHpoa(hpoaFilePath); + //diseaseData = DiseaseDataParser.parseDiseaseDataFromHpoa(hpoaFilePath); + Set diseaseDatabase = Set.of("OMIM").stream() + .map(DiseaseDatabase::fromString) + .collect(Collectors.toSet()); + HpoDiseaseLoaderOptions options = HpoDiseaseLoaderOptions.of(diseaseDatabase,false, 100); + HpoDiseaseLoader loader = HpoDiseaseLoaders.defaultLoader(hpoOntology(), options); + HpoDiseases diseases = loader.load(hpoaFilePath); + diseaseData = DiseaseDataPhenolIngest.of(hpoOntology(), diseases); } catch (IOException e) { throw new IllegalStateException(e); } From b5b65e0061c79d6c01a347b8eda053b936326c33 Mon Sep 17 00:00:00 2001 From: hansenp <10495485+hansenp@users.noreply.github.com> Date: Tue, 2 Dec 2025 12:47:34 +0100 Subject: [PATCH 4/5] Adjusted BOQA method name to computeBoqaResultsRawLog (#614). --- .../exomiser/core/prioritisers/BoqaPrioritiser.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/prioritisers/BoqaPrioritiser.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/prioritisers/BoqaPrioritiser.java index ece7e16c5..d3269e506 100644 --- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/prioritisers/BoqaPrioritiser.java +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/prioritisers/BoqaPrioritiser.java @@ -47,11 +47,11 @@ public PriorityType priorityType() { @Override public Stream prioritise(List hpoIds, List genes) { - logger.info("Running BOQA prioritiser..."); + logger.info("Running BOQA prioritiser...XXX"); var observedHpoIds = hpoIds.stream().map(TermId::of).collect(toUnmodifiableSet()); PatientData patientData = new ExomiserPatientData(observedHpoIds, Collections.emptySet()); AlgorithmParameters params = AlgorithmParameters.create(alpha, beta); - BoqaAnalysisResult boqaAnalysisResult = BoqaPatientAnalyzer.computeBoqaExomiserResults(patientData, counter, params); + BoqaAnalysisResult boqaAnalysisResult = BoqaPatientAnalyzer.computeBoqaResultsRawLog(patientData, counter, params); List rescaledBoqaResults = reScaledRawLogBoqaExomiserScores(boqaAnalysisResult.boqaResults()); logger.debug("Top 10 BOQA results:"); rescaledBoqaResults.stream().sorted(Comparator.comparing(BoqaResult::boqaScore)).limit(10).forEach(b -> logger.debug("BOQA score: {} {} {}", b.counts().diseaseId(), b.boqaScore(), b.counts().diseaseLabel())); From f1c7f41ea488a0c24f340c82a0edba253c7daba6 Mon Sep 17 00:00:00 2001 From: hansenp <10495485+hansenp@users.noreply.github.com> Date: Fri, 13 Feb 2026 09:19:27 +0100 Subject: [PATCH 5/5] Cleaning up (#614). --- .../core/prioritisers/BoqaPrioritiser.java | 59 +------------------ 1 file changed, 1 insertion(+), 58 deletions(-) diff --git a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/prioritisers/BoqaPrioritiser.java b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/prioritisers/BoqaPrioritiser.java index d3269e506..88eff2545 100644 --- a/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/prioritisers/BoqaPrioritiser.java +++ b/exomiser-core/src/main/java/org/monarchinitiative/exomiser/core/prioritisers/BoqaPrioritiser.java @@ -47,7 +47,7 @@ public PriorityType priorityType() { @Override public Stream prioritise(List hpoIds, List genes) { - logger.info("Running BOQA prioritiser...XXX"); + logger.info("Running BOQA prioritiser..."); var observedHpoIds = hpoIds.stream().map(TermId::of).collect(toUnmodifiableSet()); PatientData patientData = new ExomiserPatientData(observedHpoIds, Collections.emptySet()); AlgorithmParameters params = AlgorithmParameters.create(alpha, beta); @@ -97,63 +97,6 @@ private static List reScaledRawLogBoqaExomiserScores(List minMaxScaledBoqaResultScores(List boqaResults) { - // BoqaResult scores are normalised so that they sum to 1 across all results. This leads to tiny, tiny scores. - // This uses min-max scaling, which doesn't perform well with outliers. It seems like BOQA produces data with - // extreme outliers, so this needs to use a different scaling method. - // Min-max scaled score: - // scaled_score = (score - min_score) / (max_score - min_score) - // Equivalent to: - // scale_factor = 1 / (max_score - min_score) - // scaled_score = (score - min_score) * scale_factor - double minScore = boqaResults.stream().mapToDouble(BoqaResult::boqaScore).min().orElse(0d); - double maxScore = boqaResults.stream().mapToDouble(BoqaResult::boqaScore).max().orElse(0d); - double scaleFactor = 1 / (maxScore - minScore); - return boqaResults.stream() - .map(boqaResult -> new BoqaResult(boqaResult.counts(), (boqaResult.boqaScore() - minScore) * scaleFactor)) - .toList(); - } - - // TODO: use softmax? - private static List softMaxScaledBoqaResultScores(List boqaResults) { - // Apply temperature scaling first - double maxScore = boqaResults.stream() - .mapToDouble(BoqaResult::boqaScore) - .max() - .orElse(0.0); - - double total = boqaResults.stream() - .mapToDouble(b -> Math.exp(b.boqaScore() - maxScore)) - .sum(); - double scaleFactor = 1 / total; - -// return (Math.exp(input) / total) == (Math.expt(score) * ( 1 / total)) - return boqaResults.stream() - .map(boqaResult -> new BoqaResult(boqaResult.counts(), 0.5)) - .toList(); - } - - // 1 - ( rank / numTotalDiseases) // rank-scaled score - // TODO: Produces slightly different phenotype scores when run multiple times in a row. - private static List rankScaledBoqaResultScores(List boqaResults) { - int numBoqaResults = boqaResults.size(); - List rankedBoqaResults = boqaResults.stream().sorted(Comparator.comparing(BoqaResult::boqaScore)).toList(); - List rankScaledResults = new ArrayList<>(numBoqaResults); - for (int i = 0; i < numBoqaResults; i++) { - BoqaResult boqaResult = rankedBoqaResults.get(i); - rankScaledResults.add(new BoqaResult(boqaResult.counts(), 1.0 - (i / (double) numBoqaResults))); - } - return rankScaledResults; - } - /** * If the gene is not contained in the database, we return an empty * but initialized RelevanceScore object. Otherwise, we retrieve a list of