diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/IndexCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/IndexCommandExecutor.java index 14868749cd..7f4b9707ff 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/IndexCommandExecutor.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/IndexCommandExecutor.java @@ -59,11 +59,11 @@ public void execute() { Path indexFile = Paths.get(this.appHome).resolve("conf").resolve("mongodb-indexes.json"); logger.info("Using index configuration file: {}", indexFile.toAbsolutePath()); - IndexManager indexManager = new IndexManager(indexCommandOptions.database, indexFile, configuration); + IndexManager indexManager = new IndexManager(indexCommandOptions.database, indexFile, dataReleaseManager, configuration); if (indexCommandOptions.validate) { indexManager.validateMongoDBIndexes(indexCommandOptions.data); } else { - indexManager.createMongoDBIndexes(indexCommandOptions.data, indexCommandOptions.dataRelease, + indexManager.createMongoDBIndexes(indexCommandOptions.data, Integer.parseInt(indexCommandOptions.dataRelease), indexCommandOptions.dropIndexesFirst); } diff --git a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/LoadCommandExecutor.java b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/LoadCommandExecutor.java index 5f03ddc96e..d36af41113 100644 --- a/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/LoadCommandExecutor.java +++ b/cellbase-app/src/main/java/org/opencb/cellbase/app/cli/admin/executors/LoadCommandExecutor.java @@ -110,7 +110,7 @@ public void execute() throws CellBaseException { loadRunner = new LoadRunner(loader, database, numThreads, dataReleaseManager, configuration); if (createIndexes) { Path indexFile = Paths.get(this.appHome).resolve("conf").resolve("mongodb-indexes.json"); - indexManager = new IndexManager(database, indexFile, configuration); + indexManager = new IndexManager(database, indexFile, dataReleaseManager, configuration); } for (String data : dataList) { diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/indexer/IndexManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/indexer/IndexManager.java index de187b00a5..c2be6beaf3 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/indexer/IndexManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/indexer/IndexManager.java @@ -19,9 +19,15 @@ import com.fasterxml.jackson.databind.DeserializationFeature; import com.fasterxml.jackson.databind.MapperFeature; import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.StringUtils; import org.opencb.cellbase.core.config.CellBaseConfiguration; +import org.opencb.cellbase.core.exception.CellBaseException; +import org.opencb.cellbase.core.models.Release; +import org.opencb.cellbase.lib.EtlCommons; import org.opencb.cellbase.lib.db.MongoDBManager; +import org.opencb.cellbase.lib.impl.core.CellBaseDBAdaptor; +import org.opencb.cellbase.lib.managers.DataReleaseManager; import org.opencb.commons.datastore.core.ObjectMap; import org.opencb.commons.datastore.mongodb.MongoDBIndexUtils; import org.opencb.commons.datastore.mongodb.MongoDataStore; @@ -35,10 +41,12 @@ import java.util.*; import static org.opencb.cellbase.lib.impl.core.CellBaseDBAdaptor.DATA_RELEASE_SEPARATOR; +import static org.opencb.cellbase.lib.impl.core.CellBaseDBAdaptor.buildCollectionName; public class IndexManager { + private DataReleaseManager dataReleaseManager; private CellBaseConfiguration configuration; private Logger logger; private String databaseName; @@ -48,31 +56,10 @@ public class IndexManager { private Map>> indexes; - private static final Map> DATA_COLLECTIONS = new HashMap<>(); - - static { - DATA_COLLECTIONS.put("genome", Arrays.asList("genome_info", "genome_sequence")); - DATA_COLLECTIONS.put("conservation", Collections.singletonList("conservation")); - DATA_COLLECTIONS.put("repeats", Collections.singletonList("repeats")); - DATA_COLLECTIONS.put("gene", Arrays.asList("gene", "refseq")); - DATA_COLLECTIONS.put("protein", Collections.singletonList("protein")); - DATA_COLLECTIONS.put("regulation", Arrays.asList("regulatory_region", "regulatory_pfm")); - DATA_COLLECTIONS.put("variation", Collections.singletonList("variation")); - DATA_COLLECTIONS.put("variation_functional_score", Collections.singletonList("missense_variation_functional_score")); - DATA_COLLECTIONS.put("protein_functional_prediction", Collections.singletonList("protein_functional_prediction")); - DATA_COLLECTIONS.put("revel", Collections.singletonList("revel")); - DATA_COLLECTIONS.put("alphamissense", Collections.singletonList("alphamissense")); - DATA_COLLECTIONS.put("clinical_variants", Collections.singletonList("clinical_variants")); - DATA_COLLECTIONS.put("splice_score", Collections.singletonList("splice_score")); - DATA_COLLECTIONS.put("ontology", Collections.singletonList("ontology")); - DATA_COLLECTIONS.put("pubmed", Collections.singletonList("pubmed")); - DATA_COLLECTIONS.put("pharmacogenomics", Collections.singletonList("pharmacogenomics")); - DATA_COLLECTIONS.put("polygenic_score", Arrays.asList("variant_polygenic_score", "common_polygenic_score")); - } - - public IndexManager(String databaseName, Path indexFile, CellBaseConfiguration configuration) { + public IndexManager(String databaseName, Path indexFile, DataReleaseManager dataReleaseManager, CellBaseConfiguration configuration) { this.databaseName = databaseName; this.indexFile = indexFile; + this.dataReleaseManager = dataReleaseManager; this.configuration = configuration; init(); @@ -82,8 +69,6 @@ private void init() { logger = LoggerFactory.getLogger(this.getClass()); mongoDBManager = new MongoDBManager(configuration); -// Path indexFile = Paths.get("./cellbase-lib/src/main/resources/mongodb-indexes.json"); - MongoDataStore mongoDBDatastore = mongoDBManager.createMongoDBDatastore(databaseName); mongoDBIndexUtils = new MongoDBIndexUtils(mongoDBDatastore, indexFile); @@ -99,24 +84,46 @@ private void init() { * @param dropIndexesFirst if TRUE, deletes the index before creating a new one. FALSE, no index is created if it * already exists. * @throws IOException if configuration file can't be read + * @throws CellBaseException if DataRelease manager raises an exception */ - @Deprecated - public void createMongoDBIndexes(String data, String dataRelease, boolean dropIndexesFirst) throws IOException { - // InputStream indexResourceStream = getClass().getResourceAsStream("mongodb-indexes.json"); + public void createMongoDBIndexes(String data, int dataRelease, boolean dropIndexesFirst) throws IOException, CellBaseException { + Release release = dataReleaseManager.get(dataRelease); + + List collections = new ArrayList<>(); if (StringUtils.isEmpty(data) || "all".equalsIgnoreCase(data)) { - mongoDBIndexUtils.createAllIndexes(dropIndexesFirst); -// mongoDBIndexUtils.createAllIndexes(mongoDataStore, indexResourceStream, dropIndexesFirst); - logger.info("Loaded all indexes"); + logger.info("Indexing all data ({}) for data release {}", StringUtils.join(release.getCollections().keySet(), ", "), + dataRelease); + for (Map.Entry entry : release.getCollections().entrySet()) { + // Sanity check + if (!entry.getValue().endsWith(DATA_RELEASE_SEPARATOR + dataRelease)) { + throw new CellBaseException("Something wrong when indexing: Collection " + entry.getValue() + " found when indexing" + + " data release " + dataRelease); + } + collections.add(entry.getValue()); + } } else { List dataList = Arrays.asList(data.split(",")); for (String dataName : dataList) { - List collections = new ArrayList<>(); - for (String collection : DATA_COLLECTIONS.get(dataName)) { - collections.add(collection + DATA_RELEASE_SEPARATOR + dataRelease); + if (release.getCollections().containsKey(dataName)) { + collections.add(release.getCollections().get(dataName)); + } else { + throw new CellBaseException("Error indexing: data '" + dataName + "' missing in data release " + dataRelease + + " (" + StringUtils.join(release.getCollections().keySet(), ", ") + ")"); } - createMongoDBIndexes(collections, dropIndexesFirst); } } + + // Remove temporary polygenic score collections + collections.remove(CellBaseDBAdaptor.buildCollectionName(EtlCommons.PGS_COMMON_COLLECTION, dataRelease)); + collections.remove(CellBaseDBAdaptor.buildCollectionName(EtlCommons.PGS_VARIANT_COLLECTION, dataRelease)); + + // Check collection names + if (CollectionUtils.isEmpty(collections)) { + throw new CellBaseException("No collections to index"); + } + + // Create MongoDB indexes + createMongoDBIndexes(collections, dropIndexesFirst); } public void createMongoDBIndexes(List collections, boolean dropIndexesFirst) throws IOException { @@ -154,13 +161,21 @@ public void validateMongoDBIndexes(String collectionName) throws IOException { } } } - + private void createAllIndexes(int dataRelease, boolean dropIndexesFirst) throws IOException { + Map>> indexes = getIndexesFromFile(); + + for (String key : indexes.keySet()) { + String collectionName = buildCollectionName(key, dataRelease); + logger.info("Creating index for collection {}", collectionName); + mongoDBIndexUtils.createIndexes(collectionName, indexes.get(key), dropIndexesFirst); + logger.info("Done."); + } + } private void checkIndexes() throws IOException { if (indexes == null) { indexes = getIndexesFromFile(); } } - private Map>> getIndexesFromFile() throws IOException { ObjectMapper objectMapper = generateDefaultObjectMapper(); Map>> indexes = new HashMap<>(); diff --git a/cellbase-lib/src/main/resources/mongodb-indexes.json b/cellbase-lib/src/main/resources/mongodb-indexes.json index cfc5164e91..f624111f7c 100644 --- a/cellbase-lib/src/main/resources/mongodb-indexes.json +++ b/cellbase-lib/src/main/resources/mongodb-indexes.json @@ -186,12 +186,5 @@ {"collection": "protein_substitution_prediction", "fields": {"transcriptId": 1}, "options": {"background": true}} {"collection": "protein_substitution_prediction", "fields": {"aaPosition": 1}, "options": {"background": true}} -{"collection": "common_polygenic_score", "fields": {"id": 1}, "options": {"background": true}} -{"collection": "common_polygenic_score", "fields": {"name": 1}, "options": {"background": true}} -{"collection": "common_polygenic_score", "fields": {"source": 1}, "options": {"background": true}} -{"collection": "variant_polygenic_score", "fields": {"_chunkIds": 1}, "options": {"background": true}} -{"collection": "variant_polygenic_score", "fields": {"chromosome": 1, "position": 1}, "options": {"background": true}} -{"collection": "variant_polygenic_score", "fields": {"polygenicScores.id": 1}, "options": {"background": true}} - {"collection": "snp", "fields": {"id": 1}, "options": {"background": true}} {"collection": "snp", "fields": {"chromosome": 1, "position": 1, "reference": 1}, "options": {"background": true}} diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/GenericMongoDBAdaptorTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/GenericMongoDBAdaptorTest.java index 23c694f7c9..ee7e523c18 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/GenericMongoDBAdaptorTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/GenericMongoDBAdaptorTest.java @@ -17,6 +17,7 @@ package org.opencb.cellbase.lib; import org.apache.commons.collections4.CollectionUtils; +import org.junit.Assert; import org.opencb.biodata.models.variant.Variant; import org.opencb.cellbase.core.common.GitRepositoryState; import org.opencb.cellbase.core.config.CellBaseConfiguration; @@ -66,7 +67,7 @@ public class GenericMongoDBAdaptorTest { private static final String LOCALHOST = "localhost:27017"; protected static final String SPECIES = "hsapiens"; protected static final String ASSEMBLY = "grch38"; -// protected static final String API_VERSION = "v5"; + // protected static final String API_VERSION = "v5"; private static final String MONGODB_CELLBASE_LOADER = "org.opencb.cellbase.lib.loader.MongoDBCellBaseLoader"; protected CellBaseConfiguration cellBaseConfiguration; protected CellBaseManagerFactory cellBaseManagerFactory; @@ -90,18 +91,19 @@ public GenericMongoDBAdaptorTest() { GenericMongoDBAdaptorTest.class.getClassLoader().getResourceAsStream("configuration.test.yaml"), CellBaseConfiguration.ConfigurationFileFormat.YAML); - String[] versionSplit = GitRepositoryState.get().getBuildVersion().split("\\."); - cellBaseConfiguration.setVersion("v" + versionSplit[0] + "." + versionSplit[1]); - cellBaseManagerFactory = new CellBaseManagerFactory(cellBaseConfiguration); + String[] versionSplit = GitRepositoryState.get().getBuildVersion().split("\\."); + cellBaseConfiguration.setVersion("v" + versionSplit[0] + "." + versionSplit[1]); + cellBaseManagerFactory = new CellBaseManagerFactory(cellBaseConfiguration); - cellBaseName = DatabaseNameUtils.getDatabaseName(SPECIES, ASSEMBLY, cellBaseConfiguration.getVersion()); + cellBaseName = DatabaseNameUtils.getDatabaseName(SPECIES, ASSEMBLY, cellBaseConfiguration.getVersion()); - loadRunner = new LoadRunner(MONGODB_CELLBASE_LOADER, cellBaseName, 2, - cellBaseManagerFactory.getDataReleaseManager(SPECIES, ASSEMBLY), cellBaseConfiguration); + loadRunner = new LoadRunner(MONGODB_CELLBASE_LOADER, cellBaseName, 2, + cellBaseManagerFactory.getDataReleaseManager(SPECIES, ASSEMBLY), cellBaseConfiguration); initDB(); } catch (Exception e) { e.printStackTrace(); + Assert.fail(); } } diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/ClinicalMongoDBAdaptorTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/ClinicalMongoDBAdaptorTest.java index f5d7bd887b..c70f2611cb 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/ClinicalMongoDBAdaptorTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/ClinicalMongoDBAdaptorTest.java @@ -31,13 +31,18 @@ import org.opencb.cellbase.core.exception.CellBaseException; import org.opencb.cellbase.core.result.CellBaseDataResult; import org.opencb.cellbase.lib.GenericMongoDBAdaptorTest; +import org.opencb.cellbase.lib.loader.LoaderException; import org.opencb.cellbase.lib.managers.ClinicalManager; import org.opencb.commons.datastore.core.QueryOptions; +import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.net.URISyntaxException; import java.util.Arrays; import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; +import java.util.concurrent.ExecutionException; import static org.junit.jupiter.api.Assertions.*; diff --git a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/indexer/IndexManagerTest.java b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/indexer/IndexManagerTest.java index 41efe04f95..63917217f1 100644 --- a/cellbase-lib/src/test/java/org/opencb/cellbase/lib/indexer/IndexManagerTest.java +++ b/cellbase-lib/src/test/java/org/opencb/cellbase/lib/indexer/IndexManagerTest.java @@ -10,6 +10,7 @@ import org.opencb.cellbase.lib.GenericMongoDBAdaptorTest; import org.opencb.cellbase.lib.db.MongoDBManager; import org.opencb.cellbase.lib.impl.core.CellBaseDBAdaptor; +import org.opencb.cellbase.lib.managers.DataReleaseManager; import org.opencb.cellbase.lib.managers.GeneManager; import org.opencb.commons.datastore.core.DataResult; import org.opencb.commons.datastore.mongodb.MongoDBCollection; @@ -26,18 +27,40 @@ public class IndexManagerTest extends GenericMongoDBAdaptorTest { + private DataReleaseManager dataReleaseManager; private IndexManager indexManager; - public IndexManagerTest() throws URISyntaxException { + public IndexManagerTest() throws URISyntaxException, CellBaseException { super(); Path path = Paths.get(getClass().getResource("/index/mongodb-indexes.json").toURI()); - indexManager = new IndexManager(cellBaseName, path, cellBaseConfiguration); + dataReleaseManager = new DataReleaseManager(SPECIES, ASSEMBLY, cellBaseConfiguration); + indexManager = new IndexManager(cellBaseName, path, dataReleaseManager, cellBaseConfiguration); } @Test - public void testIndexes() throws IOException, CellBaseException, QueryException, IllegalAccessException { - String collectionName = "gene" + CellBaseDBAdaptor.DATA_RELEASE_SEPARATOR + dataRelease.getRelease(); + public void testIndexesUsingDataName() throws IOException, CellBaseException, QueryException, IllegalAccessException { + indexManager.createMongoDBIndexes("gene", dataRelease.getRelease(), true); + + MongoDBManager mongoDBManager = new MongoDBManager(cellBaseConfiguration); + MongoDataStore mongoDataStore = mongoDBManager.createMongoDBDatastore(SPECIES, ASSEMBLY); + MongoDBCollection mongoDBCollection = mongoDataStore.getCollection(CellBaseDBAdaptor.buildCollectionName("gene", dataRelease.getRelease())); + DataResult index = mongoDBCollection.getIndex(); + assertNotNull(index); + + GeneManager geneManager = cellBaseManagerFactory.getGeneManager(SPECIES, ASSEMBLY); + GeneQuery query = new GeneQuery(); + query.setNames(Collections.singletonList("BRCA1")); + query.setDataRelease(dataRelease.getRelease()); + CellBaseDataResult result = geneManager.search(query); + assertEquals(1, result.getNumResults()); + assertEquals("BRCA1", result.getResults().get(0).getName()); + assertEquals("ENSG00000012048", result.getResults().get(0).getId()); + } + + @Test + public void testIndexesUsingCollectionName() throws IOException, CellBaseException, QueryException, IllegalAccessException { + String collectionName = CellBaseDBAdaptor.buildCollectionName("gene", dataRelease.getRelease()); indexManager.createMongoDBIndexes(Collections.singletonList(collectionName), true); diff --git a/cellbase-lib/src/test/resources/configuration.test.yaml b/cellbase-lib/src/test/resources/configuration.test.yaml index c5dae5d1f3..22dd5d5532 100644 --- a/cellbase-lib/src/test/resources/configuration.test.yaml +++ b/cellbase-lib/src/test/resources/configuration.test.yaml @@ -1,5 +1,5 @@ --- -version: v5 +version: "${CELLBASE.VERSION}" apiVersion: "${project.version}" wiki: https://github.com/opencb/cellbase/wiki maintenanceFlagFile: "/tmp/maintenance" diff --git a/cellbase-lib/src/test/resources/index/mongodb-indexes.json b/cellbase-lib/src/test/resources/index/mongodb-indexes.json index cfc5164e91..f624111f7c 100644 --- a/cellbase-lib/src/test/resources/index/mongodb-indexes.json +++ b/cellbase-lib/src/test/resources/index/mongodb-indexes.json @@ -186,12 +186,5 @@ {"collection": "protein_substitution_prediction", "fields": {"transcriptId": 1}, "options": {"background": true}} {"collection": "protein_substitution_prediction", "fields": {"aaPosition": 1}, "options": {"background": true}} -{"collection": "common_polygenic_score", "fields": {"id": 1}, "options": {"background": true}} -{"collection": "common_polygenic_score", "fields": {"name": 1}, "options": {"background": true}} -{"collection": "common_polygenic_score", "fields": {"source": 1}, "options": {"background": true}} -{"collection": "variant_polygenic_score", "fields": {"_chunkIds": 1}, "options": {"background": true}} -{"collection": "variant_polygenic_score", "fields": {"chromosome": 1, "position": 1}, "options": {"background": true}} -{"collection": "variant_polygenic_score", "fields": {"polygenicScores.id": 1}, "options": {"background": true}} - {"collection": "snp", "fields": {"id": 1}, "options": {"background": true}} {"collection": "snp", "fields": {"chromosome": 1, "position": 1, "reference": 1}, "options": {"background": true}}