Skip to content

Commit 2bf0c48

Browse files
committed
Merge remote-tracking branch 'gitlab/master'
Conflicts: paw
2 parents 5b01f1e + 0b5e546 commit 2bf0c48

10 files changed

Lines changed: 104 additions & 93 deletions

File tree

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ include(CheckIncludeFileCXX)
77
# The version number
88
set (graphtyper_VERSION_MAJOR 2)
99
set (graphtyper_VERSION_MINOR 7)
10-
set (graphtyper_VERSION_PATCH 5)
10+
set (graphtyper_VERSION_PATCH 6)
1111

1212
# Graphtyper's headers
1313
include_directories("${CMAKE_CURRENT_SOURCE_DIR}/include")

include/graphtyper/graph/sv.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ class SV
4444
int32_t length = 0; // Length of alt. allele minus ref. allele
4545
int32_t size = 0; // Total size, i.e. length of larger allele excluding padding base
4646
int32_t end = 0;
47-
int16_t n_clusters = 0;
48-
int16_t num_merged_svs = -1;
47+
int32_t n_clusters = 0;
48+
int32_t num_merged_svs = -1;
4949
int32_t or_start = -1; // Start coordinate of the sequence origin
5050
int32_t or_end = -1; // End coordinate of the sequence origin
5151
int32_t related_sv = -1;

src/graph/constructor.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1283,6 +1283,9 @@ void add_var_record(std::vector<VarRecord> & var_records,
12831283
seqan::StringSet<seqan::CharString> infos;
12841284
seqan::strSplit(infos, vcf_record.info, seqan::EqualsChar<';'>());
12851285

1286+
// for a special case for Dragen 3.7+ files.
1287+
bool is_a_dup = false;
1288+
12861289
for (auto const & info : infos)
12871290
{
12881291
long const EQ_SIGN_POS = std::distance(seqan::begin(info), std::find(seqan::begin(info), seqan::end(info), '='));
@@ -1292,6 +1295,9 @@ void add_var_record(std::vector<VarRecord> & var_records,
12921295
? std::string(seqan::begin(info) + EQ_SIGN_POS + 1, seqan::end(info))
12931296
: std::string("");
12941297

1298+
if (key == "DUPSVLEN")
1299+
is_a_dup = true;
1300+
12951301
parse_info_sv_type("SVTYPE", key, val, sv.type) || // SVTYPE
12961302
parse_info_int("END", key, val, sv.end) || // END
12971303
parse_info_int("SVSIZE", key, val, sv.size) || // SVSIZE
@@ -1304,6 +1310,7 @@ void add_var_record(std::vector<VarRecord> & var_records,
13041310
parse_info_str("SVINSSEQ", key, val, sv.ins_seq) || // SVINSSEQ
13051311
parse_info_str("LEFT_SVINSSEQ", key, val, sv.ins_seq_left) || // LEFT_SVINSSEQ
13061312
parse_info_str("RIGHT_SVINSSEQ", key, val, sv.ins_seq_right) || // RIGHT_SVINSSEQ
1313+
parse_info_str("DUPSVINSSEQ", key, val, sv.ins_seq) || // DUPSVINSSEQ
13071314
parse_info_inv_type(key, sv.inv_type); // INV3 and INV5
13081315
}
13091316

@@ -1318,6 +1325,9 @@ void add_var_record(std::vector<VarRecord> & var_records,
13181325
std::exit(1);
13191326
}
13201327

1328+
if (sv.type == INS && is_a_dup)
1329+
sv.type = DUP; // special case for Dragen 3.7 files, there small duplications have SVTYPE=INS
1330+
13211331
if (sv.length < 0)
13221332
sv.length = -sv.length; // Make SVLEN positive
13231333

src/main.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -890,6 +890,7 @@ int subcmd_genotype_sv(paw::Parser & parser)
890890
std::string sam{};
891891
std::string sams{};
892892
std::string sv_vcf{};
893+
std::string encoding{"vcf"};
893894
bool force_copy_reference{false};
894895
bool force_no_copy_reference{false};
895896
bool see_advanced_options{false};
@@ -1006,6 +1007,12 @@ int subcmd_genotype_sv(paw::Parser & parser)
10061007
"(0-level) in the header. The byte range of these blocks will also be printed in "
10071008
"${prefix}.samples_byte_range.");
10081009

1010+
parser.parse_advanced_option(encoding, ' ', "encoding", "Select output encoding. Available are: vcf, popvcf");
1011+
1012+
// set default compression level as 9 when popvcf, since level 9 is already very fast anyway in that encoding mode
1013+
if (encoding == "popvcf")
1014+
opts.bgzf_compression_level = 9;
1015+
10091016
// Changed behaviour such that zero qual SVs are not filtered out by default
10101017
if (!force_filter_zero_qual)
10111018
opts.force_no_filter_zero_qual = true;
@@ -1016,6 +1023,7 @@ int subcmd_genotype_sv(paw::Parser & parser)
10161023
parser.finalize();
10171024
setup_logger();
10181025

1026+
opts.encoding = (encoding == "popvcf") ? 'p' : 'v';
10191027
print_log(gyper::log_severity::info, "Running the 'genotype_sv' subcommand.");
10201028

10211029
#ifndef NDEBUG
@@ -1302,20 +1310,31 @@ int subcmd_vcf_concatenate(paw::Parser & parser)
13021310

13031311
int subcmd_vcf_merge(paw::Parser & parser)
13041312
{
1313+
gyper::Options & opts = *(gyper::Options::instance());
1314+
13051315
std::vector<std::string> vcfs;
13061316
std::string output_fn;
13071317
std::string file_list;
1318+
std::string encoding{"vcf"};
13081319
bool is_sv_vcf{false};
13091320

13101321
parser.parse_option(output_fn, 'o', "output", "Output VCF file name.");
13111322
parser.parse_option(file_list, ' ', "file_list", "File containing VCFs to merge.");
13121323
parser.parse_option(is_sv_vcf, ' ', "sv", "Set if the input VCFs were generated from genotype_sv.");
1324+
parser.parse_option(encoding, ' ', "encoding", "Select output encoding. Available are: vcf, popvcf");
1325+
1326+
// set default compression level as 9 when popvcf, since level 9 is already very fast anyway in that encoding mode
1327+
if (encoding == "popvcf")
1328+
opts.bgzf_compression_level = 9;
13131329

13141330
parser.parse_remaining_positional_arguments(vcfs, "vcfs...", "VCFs to merge");
13151331

13161332
parser.finalize();
13171333
setup_logger();
13181334

1335+
opts.encoding = (encoding == "popvcf") ? 'p' : 'v';
1336+
opts.is_on_final_output = true;
1337+
13191338
if (is_sv_vcf)
13201339
gyper::graph.is_sv_graph = true;
13211340

src/typer/caller.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -549,8 +549,14 @@ void run_first_pass(bam1_t * hts_rec,
549549

550550
if (ref_offset >= REF_SIZE)
551551
{
552-
print_log(log_severity::error, __HERE__, " Unexpected ref_offset = ", ref_offset);
553-
std::exit(1);
552+
if (ref_offset > REF_SIZE)
553+
{
554+
print_log(log_severity::warning, __HERE__, " Unexpected ref_offset=", ref_offset, " > REF_SIZE=", REF_SIZE);
555+
}
556+
557+
// ref_offset==REF_SIZE occurs if the first read happens to be on the very last base. In this case is safe to just
558+
// ignore it.
559+
break;
554560
}
555561

556562
Read read;

src/typer/variant.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1632,9 +1632,9 @@ std::vector<Variant> break_down_variant(Variant && var,
16321632
else if (!is_no_variant_overlapping)
16331633
{
16341634
// Use the skyr
1635-
print_log(log_severity::debug, "Using the skyr");
1635+
print_debug("Using the skyr");
16361636
std::vector<Variant> new_broken_down_vars = break_down_skyr(std::move(var), reach);
1637-
print_log(log_severity::debug, "skyr finished.");
1637+
print_debug("skyr finished.");
16381638

16391639
std::move(new_broken_down_vars.begin(), new_broken_down_vars.end(), std::back_inserter(broken_down_vars));
16401640
}

src/typer/vcf.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1119,8 +1119,9 @@ void Vcf::write_record(Variant const & var,
11191119
assert(new_line_size > old_line_size);
11201120
double const bytes_per_call = static_cast<double>(new_line_size - old_line_size) / static_cast<double>(i + 1);
11211121
double total_bytes_expected = old_line_size + bytes_per_call * static_cast<double>(var.calls.size());
1122+
double constexpr MAX_BYTES{static_cast<double>(std::numeric_limits<int32_t>::max()) * 0.9};
11221123

1123-
if (total_bytes_expected >= 2000000000.0)
1124+
if (total_bytes_expected >= MAX_BYTES)
11241125
{
11251126
print_log(log_severity::warning,
11261127
" Skipping variant with extreme expected line size=",

src/typer/vcf_operations.cpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
#include <cmath> // sqrt
2-
#include <sstream> // std::ostringstream
3-
#include <string> // std::string
4-
#include <vector> // std::vector
1+
#include <cmath> // sqrt
2+
#include <libgen.h> // dirname
3+
#include <sstream> // std::ostringstream
4+
#include <string> // std::string
5+
#include <sys/stat.h> // mkdir
6+
#include <vector> // std::vector
57

68
#include <graphtyper/graph/absolute_position.hpp>
79
#include <graphtyper/graph/genomic_region.hpp>
@@ -149,6 +151,16 @@ void vcf_merge(std::vector<std::string> & vcfs, std::string const & output)
149151
if (vcfs.size() == 0)
150152
return;
151153

154+
// Create output directory
155+
{
156+
int const l = output.size();
157+
char * c = new char[l + 1];
158+
std::copy(output.begin(), output.end(), c);
159+
c[l] = '\0';
160+
mkdir(dirname(c), 0755);
161+
delete[] c;
162+
}
163+
152164
gyper::Vcf vcf;
153165
vcf.open(READ_MODE, vcfs.at(0));
154166
vcf.read(); // Read the entire file

0 commit comments

Comments
 (0)