-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathSnakefile_learn
More file actions
executable file
·142 lines (124 loc) · 4.18 KB
/
Snakefile_learn
File metadata and controls
executable file
·142 lines (124 loc) · 4.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
__author__ = 'Frederic Escudie'
__copyright__ = 'Copyright (C) 2022 CHU Toulouse'
__license__ = 'GNU General Public License'
__version__ = '1.0.0'
import re
import glob
from time import strftime, gmtime
########################################################################
#
# Functions
#
########################################################################
def splFromPattern(pattern, excluded=None):
excluded = list() if excluded is None else excluded
samples_pathes = glob.glob(pattern.replace("{sample}", "*"))
samples_names = set()
for spl in samples_pathes:
match = re.match("^" + pattern.replace("{sample}", "(.+)") + "$", spl)
samples_names.add(match.groups(1)[0])
return samples_names - set(excluded)
def getLogMessage(wf_name, msg, log_level="INFO"):
"""
Return printable log message for the workflow.
:param wf_name: Name of the workflow.
:type wf_name: str
:param msg: Message content.
:type msg: str
:param log_level: Logging level.
:type log_level: str
:return: Printable log message.
:rtype: str
"""
return '{} - {} [{}] {}'.format(
strftime("%Y-%m-%d %H:%M:%S", gmtime()),
wf_name,
log_level,
msg
)
########################################################################
#
# Logging
#
########################################################################
wf_name = "MInITI_learn"
wf_version = __version__
onstart:
print(getLogMessage(
wf_name,
"\033[34mStart\033[0m workflow on version: {}".format(wf_version)
))
onsuccess:
print(getLogMessage(
wf_name,
"Workflow completed \033[92msuccessfully\033[0m"
))
onerror:
print(getLogMessage(
wf_name,
"Execution \033[91mfailure\033[0m",
"ERROR"
))
########################################################################
#
# Parameters
#
########################################################################
samples_names = None
aln_pattern = config.get("input").get("aln_pattern")
if aln_pattern is not None:
samples_names = splFromPattern(aln_pattern, config.get("input").get("excluded_samples"))
else:
samples_names = splFromPattern(config.get("input")["R1_pattern"], config.get("input").get("excluded_samples"))
if len(samples_names) == 0:
raise Exception("No sample can be found from the input parameters.")
########################################################################
#
# Process
#
########################################################################
include: "rules/all_learn.smk"
rule all:
input:
"microsat/microsatModel.json"
# Alignment
if aln_pattern is None:
aln_pattern = "aln/{sample}.bam"
bwa_mem(
in_reads=[config.get("input")["R1_pattern"], config.get("input")["R2_pattern"]],
in_reference_seq=config.get("reference")["sequences"],
out_alignments=aln_pattern + ".tmp"
)
markDuplicates(
in_alignments=aln_pattern + ".tmp",
out_alignments=aln_pattern,
out_metrics="aln/{sample}_markDup.tsv",
out_stderr="logs/{sample}_markDup_stderr.txt",
)
# Create model
cfg_clf_ct = config.get("classifier").get("locus").get("count")
microsatLenDistrib(
in_alignments=aln_pattern,
in_microsatellites=config.get("reference")["microsatellites"],
params_keep_duplicates=cfg_clf_ct["keep_duplicates"],
params_method_name="model",
params_padding=cfg_clf_ct["padding"],
params_stitch_count=cfg_clf_ct["stitch"]
)
microsatStatusToAnnot(
in_loci_status=config.get("input")["known_status"],
in_microsatellites=config.get("reference")["microsatellites"],
out_loci_status="microsat/modelStatus.tsv",
params_locus_id=False
)
microsatCreateModel(
in_length_distributions=expand("microsat/{sample}_microsatLenDistrib.json", sample=samples_names),
in_loci_status="microsat/modelStatus.tsv",
in_microsatellites=config.get("reference")["microsatellites"],
out_model="microsat/microsatModel.json",
params_min_support=config.get("classifier")["locus"]["min_support"],
params_peak_height_cutoff=config.get("classifier")["locus"]["msings"]["peak_height_cutoff"],
params_keep_outputs=True
)
# Analysis report
# wfReport()