Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion deps/gbwtgraph
35 changes: 31 additions & 4 deletions src/subcommand/gbwt_main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ struct GBWTConfig {

// GBZ construction.
bool set_pggname = false;
std::string supergraph_filename;
bool unset_pggname = false;
bool gbz_v1 = false;

Expand Down Expand Up @@ -307,6 +308,7 @@ void help_gbwt(char** argv) {
std::cerr << " --translation FILE write the segment to node translation table to FILE" << std::endl;
std::cerr << " -Z, --gbz-input use GBZ as input GBWT and input graph (one input arg)" << std::endl;
std::cerr << " --set-pggname compute the pggname for the GBZ if not already present" << std::endl;
std::cerr << " --subgraph-of FILE mark the GBZ as a subgraph of the other GBZ in FILE" << std::endl;
std::cerr << " --unset-pggname clear the stored pggname for the GBZ if present" << std::endl;
std::cerr << " -E, --index-paths index the embedded non-alt paths in the graph" << std::endl;
std::cerr << " (requires -x, no input args)" << std::endl;
Expand Down Expand Up @@ -443,8 +445,9 @@ GBWTConfig parse_gbwt_config(int argc, char** argv) {
constexpr int OPT_PATH_FIELDS = 1116;
constexpr int OPT_TRANSLATION = 1117;
constexpr int OPT_SET_PGGNAME = 1118;
constexpr int OPT_UNSET_PGGNAME = 1119;
constexpr int OPT_GAM_FORMAT = 1120;
constexpr int OPT_SUBGRAPH_OF = 1119;
constexpr int OPT_UNSET_PGGNAME = 1120;
constexpr int OPT_GAM_FORMAT = 1121;
constexpr int OPT_CHUNK_SIZE = 1200;
constexpr int OPT_POS_BUFFER = 1201;
constexpr int OPT_THREAD_BUFFER = 1202;
Expand Down Expand Up @@ -512,6 +515,7 @@ GBWTConfig parse_gbwt_config(int argc, char** argv) {
// Input GBWT construction: GBZ
{ "gbz-input", no_argument, 0, 'Z' },
{ "set-pggname", no_argument, 0, OPT_SET_PGGNAME },
{ "subgraph-of", required_argument, 0, OPT_SUBGRAPH_OF },
{ "unset-pggname", no_argument, 0, OPT_UNSET_PGGNAME },

// Input GBWT construction: paths
Expand Down Expand Up @@ -724,6 +728,9 @@ GBWTConfig parse_gbwt_config(int argc, char** argv) {
case OPT_SET_PGGNAME:
config.set_pggname = true;
break;
case OPT_SUBGRAPH_OF:
config.supergraph_filename = require_exists(config.logger, optarg);
break;
case OPT_UNSET_PGGNAME:
config.unset_pggname = true;
break;
Expand Down Expand Up @@ -951,7 +958,7 @@ GBWTConfig parse_gbwt_config(int argc, char** argv) {
//----------------------------------------------------------------------------

void validate_gbwt_config(GBWTConfig& config) {
// We can either write GBWT in SDSL format to a separate file or as part of a GBZ graph.
// We can either write GBWT to a separate file or as part of a GBZ graph.
// However, `--parse-only` uses `gbwt_output` for other purposes.
bool has_gbwt_output =
(!config.gbwt_output.empty() || (!config.graph_output.empty() && !config.parse_only));
Expand Down Expand Up @@ -1351,7 +1358,7 @@ void step_1_build_gbwts(GBWTHandler& gbwts, GraphHandler& graphs, GBWTConfig& co
config.logger.info() << "Input type: GBZ" << std::endl;
}
graphs.load_gbz(gbwts, config);
if (config.set_pggname) {
if (config.set_pggname || !config.supergraph_filename.empty()) {
std::string pggname = graphs.gbz_graph->pggname();
if (pggname.empty()) {
if (config.show_progress) {
Expand All @@ -1363,6 +1370,26 @@ void step_1_build_gbwts(GBWTHandler& gbwts, GraphHandler& graphs, GBWTConfig& co
if (config.show_progress) {
config.logger.info() << "Graph name: " << pggname << std::endl;
}
if (!config.supergraph_filename.empty()) {
gbwtgraph::GraphName supergraph;
try {
gbwt::Tags tags = gbwtgraph::GBZ::simple_sds_load_tags(config.supergraph_filename);
supergraph = gbwtgraph::GraphName(tags);
} catch (const std::runtime_error& e) {
config.logger.error() << "Failed to load supergraph tags from " << config.supergraph_filename << ": " << e.what() << std::endl;
}
if (supergraph.name().empty()) {
config.logger.warn() << "Supergraph " << config.supergraph_filename << " does not have a pggname" << std::endl;
} else {
if (config.show_progress) {
config.logger.info() << "Supergraph name: " << supergraph.name() << std::endl;
}
gbwtgraph::GraphName subgraph = graphs.gbz_graph->graph_name();
subgraph.add_subgraph(subgraph.name(), supergraph.name());
subgraph.add_relationships(supergraph);
subgraph.set_tags(graphs.gbz_graph->tags);
}
}
}
if (config.unset_pggname) {
gbwtgraph::GraphName empty;
Expand Down
14 changes: 11 additions & 3 deletions test/t/37_vg_gbwt.t
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ BASH_TAP_ROOT=../deps/bash-tap

PATH=../bin:$PATH # for vg

plan tests 164
plan tests 168


# Build vg graphs for two chromosomes
Expand Down Expand Up @@ -240,6 +240,16 @@ vg gbwt --set-pggname -Z x.gbz -g x.gbz
is $? 0 "Graph name can be set"
is "$(vg describe x.gbz | grep -c 'pggname =')" 1 "GBZ contains graph name after setting"

# Set graph name and a subgraph relationship
vg gbwt -g supergraph.gbz -G graphs/components_walks.gfa
is $? 0 "GBZ construction for supergraph"
vg chunk --gbz --contig A -x supergraph.gbz
is $? 0 "Graph component extraction"
vg gbwt --subgraph-of supergraph.gbz -g subgraph.gbz -Z chunk_0_component_0.gbz
is $? 0 "Subgraph relationship can be set"
is "$(vg describe subgraph.gbz | grep -c 'subgraph =')" 1 "GBZ contains subgraph relationship"
rm -f supergraph.gbz chunk_0_component_0.gbz subgraph.gbz

# Build and serialize GBZ from VCF
vg gbwt -x x.vg -g x2.gbz -v small/xy2.vcf.gz
is $? 0 "GBZ construction from VCF"
Expand Down Expand Up @@ -430,5 +440,3 @@ vg gbwt -g gfa.gbz -G graphs/gfa_two_part_reference.gfa
is "$(vg paths -M --reference-paths -x gfa.gbz | grep -v "^#" | cut -f4 | grep NO_HAPLOTYPE | wc -l)" "2" "GBZ can represent reference paths without haplotype numbers"

rm -f gfa.gbz


Loading