Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,11 @@ public void execute() {

Path indexFile = Paths.get(this.appHome).resolve("conf").resolve("mongodb-indexes.json");
logger.info("Using index configuration file: {}", indexFile.toAbsolutePath());
IndexManager indexManager = new IndexManager(indexCommandOptions.database, indexFile, configuration);
IndexManager indexManager = new IndexManager(indexCommandOptions.database, indexFile, dataReleaseManager, configuration);
if (indexCommandOptions.validate) {
indexManager.validateMongoDBIndexes(indexCommandOptions.data);
} else {
indexManager.createMongoDBIndexes(indexCommandOptions.data, indexCommandOptions.dataRelease,
indexManager.createMongoDBIndexes(indexCommandOptions.data, Integer.parseInt(indexCommandOptions.dataRelease),
indexCommandOptions.dropIndexesFirst);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ public void execute() throws CellBaseException {
loadRunner = new LoadRunner(loader, database, numThreads, dataReleaseManager, configuration);
if (createIndexes) {
Path indexFile = Paths.get(this.appHome).resolve("conf").resolve("mongodb-indexes.json");
indexManager = new IndexManager(database, indexFile, configuration);
indexManager = new IndexManager(database, indexFile, dataReleaseManager, configuration);
}

for (String data : dataList) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,15 @@
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.MapperFeature;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.opencb.cellbase.core.config.CellBaseConfiguration;
import org.opencb.cellbase.core.exception.CellBaseException;
import org.opencb.cellbase.core.models.Release;
import org.opencb.cellbase.lib.EtlCommons;
import org.opencb.cellbase.lib.db.MongoDBManager;
import org.opencb.cellbase.lib.impl.core.CellBaseDBAdaptor;
import org.opencb.cellbase.lib.managers.DataReleaseManager;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.datastore.mongodb.MongoDBIndexUtils;
import org.opencb.commons.datastore.mongodb.MongoDataStore;
Expand All @@ -35,10 +41,12 @@
import java.util.*;

import static org.opencb.cellbase.lib.impl.core.CellBaseDBAdaptor.DATA_RELEASE_SEPARATOR;
import static org.opencb.cellbase.lib.impl.core.CellBaseDBAdaptor.buildCollectionName;


public class IndexManager {

private DataReleaseManager dataReleaseManager;
private CellBaseConfiguration configuration;
private Logger logger;
private String databaseName;
Expand All @@ -48,31 +56,10 @@ public class IndexManager {

private Map<String, List<Map<String, ObjectMap>>> indexes;

private static final Map<String, List<String>> DATA_COLLECTIONS = new HashMap<>();

static {
DATA_COLLECTIONS.put("genome", Arrays.asList("genome_info", "genome_sequence"));
DATA_COLLECTIONS.put("conservation", Collections.singletonList("conservation"));
DATA_COLLECTIONS.put("repeats", Collections.singletonList("repeats"));
DATA_COLLECTIONS.put("gene", Arrays.asList("gene", "refseq"));
DATA_COLLECTIONS.put("protein", Collections.singletonList("protein"));
DATA_COLLECTIONS.put("regulation", Arrays.asList("regulatory_region", "regulatory_pfm"));
DATA_COLLECTIONS.put("variation", Collections.singletonList("variation"));
DATA_COLLECTIONS.put("variation_functional_score", Collections.singletonList("missense_variation_functional_score"));
DATA_COLLECTIONS.put("protein_functional_prediction", Collections.singletonList("protein_functional_prediction"));
DATA_COLLECTIONS.put("revel", Collections.singletonList("revel"));
DATA_COLLECTIONS.put("alphamissense", Collections.singletonList("alphamissense"));
DATA_COLLECTIONS.put("clinical_variants", Collections.singletonList("clinical_variants"));
DATA_COLLECTIONS.put("splice_score", Collections.singletonList("splice_score"));
DATA_COLLECTIONS.put("ontology", Collections.singletonList("ontology"));
DATA_COLLECTIONS.put("pubmed", Collections.singletonList("pubmed"));
DATA_COLLECTIONS.put("pharmacogenomics", Collections.singletonList("pharmacogenomics"));
DATA_COLLECTIONS.put("polygenic_score", Arrays.asList("variant_polygenic_score", "common_polygenic_score"));
}

public IndexManager(String databaseName, Path indexFile, CellBaseConfiguration configuration) {
public IndexManager(String databaseName, Path indexFile, DataReleaseManager dataReleaseManager, CellBaseConfiguration configuration) {
this.databaseName = databaseName;
this.indexFile = indexFile;
this.dataReleaseManager = dataReleaseManager;
this.configuration = configuration;

init();
Expand All @@ -82,8 +69,6 @@ private void init() {
logger = LoggerFactory.getLogger(this.getClass());
mongoDBManager = new MongoDBManager(configuration);

// Path indexFile = Paths.get("./cellbase-lib/src/main/resources/mongodb-indexes.json");

MongoDataStore mongoDBDatastore = mongoDBManager.createMongoDBDatastore(databaseName);
mongoDBIndexUtils = new MongoDBIndexUtils(mongoDBDatastore, indexFile);

Expand All @@ -99,24 +84,46 @@ private void init() {
* @param dropIndexesFirst if TRUE, deletes the index before creating a new one. FALSE, no index is created if it
* already exists.
* @throws IOException if configuration file can't be read
* @throws CellBaseException if DataRelease manager raises an exception
*/
@Deprecated
public void createMongoDBIndexes(String data, String dataRelease, boolean dropIndexesFirst) throws IOException {
// InputStream indexResourceStream = getClass().getResourceAsStream("mongodb-indexes.json");
public void createMongoDBIndexes(String data, int dataRelease, boolean dropIndexesFirst) throws IOException, CellBaseException {
Release release = dataReleaseManager.get(dataRelease);

List<String> collections = new ArrayList<>();
if (StringUtils.isEmpty(data) || "all".equalsIgnoreCase(data)) {
mongoDBIndexUtils.createAllIndexes(dropIndexesFirst);
// mongoDBIndexUtils.createAllIndexes(mongoDataStore, indexResourceStream, dropIndexesFirst);
logger.info("Loaded all indexes");
logger.info("Indexing all data ({}) for data release {}", StringUtils.join(release.getCollections().keySet(), ", "),
dataRelease);
for (Map.Entry<String, String> entry : release.getCollections().entrySet()) {
// Sanity check
if (!entry.getValue().endsWith(DATA_RELEASE_SEPARATOR + dataRelease)) {
throw new CellBaseException("Something wrong when indexing: Collection " + entry.getValue() + " found when indexing"
+ " data release " + dataRelease);
}
collections.add(entry.getValue());
}
} else {
List<String> dataList = Arrays.asList(data.split(","));
for (String dataName : dataList) {
List<String> collections = new ArrayList<>();
for (String collection : DATA_COLLECTIONS.get(dataName)) {
collections.add(collection + DATA_RELEASE_SEPARATOR + dataRelease);
if (release.getCollections().containsKey(dataName)) {
collections.add(release.getCollections().get(dataName));
} else {
throw new CellBaseException("Error indexing: data '" + dataName + "' missing in data release " + dataRelease
+ " (" + StringUtils.join(release.getCollections().keySet(), ", ") + ")");
}
createMongoDBIndexes(collections, dropIndexesFirst);
}
}

// Remove temporary polygenic score collections
collections.remove(CellBaseDBAdaptor.buildCollectionName(EtlCommons.PGS_COMMON_COLLECTION, dataRelease));
collections.remove(CellBaseDBAdaptor.buildCollectionName(EtlCommons.PGS_VARIANT_COLLECTION, dataRelease));

// Check collection names
if (CollectionUtils.isEmpty(collections)) {
throw new CellBaseException("No collections to index");
}

// Create MongoDB indexes
createMongoDBIndexes(collections, dropIndexesFirst);
}

public void createMongoDBIndexes(List<String> collections, boolean dropIndexesFirst) throws IOException {
Expand Down Expand Up @@ -154,13 +161,21 @@ public void validateMongoDBIndexes(String collectionName) throws IOException {
}
}
}

private void createAllIndexes(int dataRelease, boolean dropIndexesFirst) throws IOException {
Map<String, List<Map<String, ObjectMap>>> indexes = getIndexesFromFile();

for (String key : indexes.keySet()) {
String collectionName = buildCollectionName(key, dataRelease);
logger.info("Creating index for collection {}", collectionName);
mongoDBIndexUtils.createIndexes(collectionName, indexes.get(key), dropIndexesFirst);
logger.info("Done.");
}
}
private void checkIndexes() throws IOException {
if (indexes == null) {
indexes = getIndexesFromFile();
}
}

private Map<String, List<Map<String, ObjectMap>>> getIndexesFromFile() throws IOException {
ObjectMapper objectMapper = generateDefaultObjectMapper();
Map<String, List<Map<String, ObjectMap>>> indexes = new HashMap<>();
Expand Down
7 changes: 0 additions & 7 deletions cellbase-lib/src/main/resources/mongodb-indexes.json
Original file line number Diff line number Diff line change
Expand Up @@ -186,12 +186,5 @@
{"collection": "protein_substitution_prediction", "fields": {"transcriptId": 1}, "options": {"background": true}}
{"collection": "protein_substitution_prediction", "fields": {"aaPosition": 1}, "options": {"background": true}}

{"collection": "common_polygenic_score", "fields": {"id": 1}, "options": {"background": true}}
{"collection": "common_polygenic_score", "fields": {"name": 1}, "options": {"background": true}}
{"collection": "common_polygenic_score", "fields": {"source": 1}, "options": {"background": true}}
{"collection": "variant_polygenic_score", "fields": {"_chunkIds": 1}, "options": {"background": true}}
{"collection": "variant_polygenic_score", "fields": {"chromosome": 1, "position": 1}, "options": {"background": true}}
{"collection": "variant_polygenic_score", "fields": {"polygenicScores.id": 1}, "options": {"background": true}}

{"collection": "snp", "fields": {"id": 1}, "options": {"background": true}}
{"collection": "snp", "fields": {"chromosome": 1, "position": 1, "reference": 1}, "options": {"background": true}}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.opencb.cellbase.lib;

import org.apache.commons.collections4.CollectionUtils;
import org.junit.Assert;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.cellbase.core.common.GitRepositoryState;
import org.opencb.cellbase.core.config.CellBaseConfiguration;
Expand Down Expand Up @@ -66,7 +67,7 @@ public class GenericMongoDBAdaptorTest {
private static final String LOCALHOST = "localhost:27017";
protected static final String SPECIES = "hsapiens";
protected static final String ASSEMBLY = "grch38";
// protected static final String API_VERSION = "v5";
// protected static final String API_VERSION = "v5";
private static final String MONGODB_CELLBASE_LOADER = "org.opencb.cellbase.lib.loader.MongoDBCellBaseLoader";
protected CellBaseConfiguration cellBaseConfiguration;
protected CellBaseManagerFactory cellBaseManagerFactory;
Expand All @@ -90,18 +91,19 @@ public GenericMongoDBAdaptorTest() {
GenericMongoDBAdaptorTest.class.getClassLoader().getResourceAsStream("configuration.test.yaml"),
CellBaseConfiguration.ConfigurationFileFormat.YAML);

String[] versionSplit = GitRepositoryState.get().getBuildVersion().split("\\.");
cellBaseConfiguration.setVersion("v" + versionSplit[0] + "." + versionSplit[1]);
cellBaseManagerFactory = new CellBaseManagerFactory(cellBaseConfiguration);
String[] versionSplit = GitRepositoryState.get().getBuildVersion().split("\\.");
cellBaseConfiguration.setVersion("v" + versionSplit[0] + "." + versionSplit[1]);
cellBaseManagerFactory = new CellBaseManagerFactory(cellBaseConfiguration);

cellBaseName = DatabaseNameUtils.getDatabaseName(SPECIES, ASSEMBLY, cellBaseConfiguration.getVersion());
cellBaseName = DatabaseNameUtils.getDatabaseName(SPECIES, ASSEMBLY, cellBaseConfiguration.getVersion());

loadRunner = new LoadRunner(MONGODB_CELLBASE_LOADER, cellBaseName, 2,
cellBaseManagerFactory.getDataReleaseManager(SPECIES, ASSEMBLY), cellBaseConfiguration);
loadRunner = new LoadRunner(MONGODB_CELLBASE_LOADER, cellBaseName, 2,
cellBaseManagerFactory.getDataReleaseManager(SPECIES, ASSEMBLY), cellBaseConfiguration);

initDB();
} catch (Exception e) {
e.printStackTrace();
Assert.fail();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,18 @@
import org.opencb.cellbase.core.exception.CellBaseException;
import org.opencb.cellbase.core.result.CellBaseDataResult;
import org.opencb.cellbase.lib.GenericMongoDBAdaptorTest;
import org.opencb.cellbase.lib.loader.LoaderException;
import org.opencb.cellbase.lib.managers.ClinicalManager;
import org.opencb.commons.datastore.core.QueryOptions;

import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.concurrent.ExecutionException;

import static org.junit.jupiter.api.Assertions.*;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.opencb.cellbase.lib.GenericMongoDBAdaptorTest;
import org.opencb.cellbase.lib.db.MongoDBManager;
import org.opencb.cellbase.lib.impl.core.CellBaseDBAdaptor;
import org.opencb.cellbase.lib.managers.DataReleaseManager;
import org.opencb.cellbase.lib.managers.GeneManager;
import org.opencb.commons.datastore.core.DataResult;
import org.opencb.commons.datastore.mongodb.MongoDBCollection;
Expand All @@ -26,18 +27,40 @@

public class IndexManagerTest extends GenericMongoDBAdaptorTest {

private DataReleaseManager dataReleaseManager;
private IndexManager indexManager;

public IndexManagerTest() throws URISyntaxException {
public IndexManagerTest() throws URISyntaxException, CellBaseException {
super();

Path path = Paths.get(getClass().getResource("/index/mongodb-indexes.json").toURI());
indexManager = new IndexManager(cellBaseName, path, cellBaseConfiguration);
dataReleaseManager = new DataReleaseManager(SPECIES, ASSEMBLY, cellBaseConfiguration);
indexManager = new IndexManager(cellBaseName, path, dataReleaseManager, cellBaseConfiguration);
}

@Test
public void testIndexes() throws IOException, CellBaseException, QueryException, IllegalAccessException {
String collectionName = "gene" + CellBaseDBAdaptor.DATA_RELEASE_SEPARATOR + dataRelease.getRelease();
public void testIndexesUsingDataName() throws IOException, CellBaseException, QueryException, IllegalAccessException {
indexManager.createMongoDBIndexes("gene", dataRelease.getRelease(), true);

MongoDBManager mongoDBManager = new MongoDBManager(cellBaseConfiguration);
MongoDataStore mongoDataStore = mongoDBManager.createMongoDBDatastore(SPECIES, ASSEMBLY);
MongoDBCollection mongoDBCollection = mongoDataStore.getCollection(CellBaseDBAdaptor.buildCollectionName("gene", dataRelease.getRelease()));
DataResult<Document> index = mongoDBCollection.getIndex();
assertNotNull(index);

GeneManager geneManager = cellBaseManagerFactory.getGeneManager(SPECIES, ASSEMBLY);
GeneQuery query = new GeneQuery();
query.setNames(Collections.singletonList("BRCA1"));
query.setDataRelease(dataRelease.getRelease());
CellBaseDataResult<Gene> result = geneManager.search(query);
assertEquals(1, result.getNumResults());
assertEquals("BRCA1", result.getResults().get(0).getName());
assertEquals("ENSG00000012048", result.getResults().get(0).getId());
}

@Test
public void testIndexesUsingCollectionName() throws IOException, CellBaseException, QueryException, IllegalAccessException {
String collectionName = CellBaseDBAdaptor.buildCollectionName("gene", dataRelease.getRelease());

indexManager.createMongoDBIndexes(Collections.singletonList(collectionName), true);

Expand Down
2 changes: 1 addition & 1 deletion cellbase-lib/src/test/resources/configuration.test.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
---
version: v5
version: "${CELLBASE.VERSION}"
apiVersion: "${project.version}"
wiki: https://github.com/opencb/cellbase/wiki
maintenanceFlagFile: "/tmp/maintenance"
Expand Down
7 changes: 0 additions & 7 deletions cellbase-lib/src/test/resources/index/mongodb-indexes.json
Original file line number Diff line number Diff line change
Expand Up @@ -186,12 +186,5 @@
{"collection": "protein_substitution_prediction", "fields": {"transcriptId": 1}, "options": {"background": true}}
{"collection": "protein_substitution_prediction", "fields": {"aaPosition": 1}, "options": {"background": true}}

{"collection": "common_polygenic_score", "fields": {"id": 1}, "options": {"background": true}}
{"collection": "common_polygenic_score", "fields": {"name": 1}, "options": {"background": true}}
{"collection": "common_polygenic_score", "fields": {"source": 1}, "options": {"background": true}}
{"collection": "variant_polygenic_score", "fields": {"_chunkIds": 1}, "options": {"background": true}}
{"collection": "variant_polygenic_score", "fields": {"chromosome": 1, "position": 1}, "options": {"background": true}}
{"collection": "variant_polygenic_score", "fields": {"polygenicScores.id": 1}, "options": {"background": true}}

{"collection": "snp", "fields": {"id": 1}, "options": {"background": true}}
{"collection": "snp", "fields": {"chromosome": 1, "position": 1, "reference": 1}, "options": {"background": true}}