Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 64 additions & 13 deletions babs/bootstrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,10 +227,43 @@ def babs_bootstrap(
)
# into `analysis/containers` folder

# Create initial container for sanity check
container = Container(container_ds, container_name, container_config)
# Discover container image path from the container dataset:
containers_path = op.join(self.analysis_path, 'containers')
result = dlapi.containers_list(dataset=containers_path, result_renderer='disabled')
container_info = [
r for r in result if r['action'] == 'containers' and r['name'] == container_name
]
if not container_info:
available = [r['name'] for r in result if r['action'] == 'containers']
raise ValueError(
f"Container '{container_name}' not found in container dataset. "
f'Available: {available}'
)
image_path_in_ds = op.relpath(container_info[0]['path'], containers_path)
container_image_path = op.join('containers', image_path_in_ds)

# Build call_fmt from user's singularity_args:
with open(container_config) as f:
user_config = yaml.safe_load(f)
singularity_args = user_config.get('singularity_args', [])
singularity_args_str = ' '.join(singularity_args) if singularity_args else ''
call_fmt = f'singularity run -B $PWD --pwd $PWD {singularity_args_str} {{img}} {{cmd}}'

# Register container at analysis level so datalad containers-run works:
print(f'\nRegistering container at analysis level: {container_image_path}')
dlapi.containers_add(
dataset=self.analysis_path,
name=container_name,
image=container_image_path,
call_fmt=call_fmt,
)

# sanity check of container ds:
container = Container(
container_ds,
container_name,
container_config,
container_image_path=container_image_path,
)
container.sanity_check(self.analysis_path)

# ==============================================================
Expand All @@ -254,9 +287,18 @@ def babs_bootstrap(
container = containers[0]
else:
self._bootstrap_single_app_scripts(
container_ds, container_name, container_config, system
container_ds,
container_name,
container_config,
system,
container_image_path=container_image_path,
)
container = Container(
container_ds,
container_name,
container_config,
container_image_path=container_image_path,
)
container = Container(container_ds, container_name, container_config)

# Copy in any other files needed:
self._init_import_files(container.config.get('imported_files', []))
Expand Down Expand Up @@ -394,21 +436,30 @@ def babs_bootstrap(
print('`babs init` was successful!')

def _bootstrap_single_app_scripts(
self, container_ds, container_name, container_config, system
self,
container_ds,
container_name,
container_config,
system,
container_image_path=None,
):
"""Bootstrap scripts for single BIDS app configuration."""
container = Container(container_ds, container_name, container_config)
container = Container(
container_ds,
container_name,
container_config,
container_image_path=container_image_path,
)

# Generate `<containerName>_zip.sh`: ----------------------------------
# which is a bash script of singularity run + zip
# in folder: `analysis/code`
print('\nGenerating a bash script for running container and zipping the outputs...')
print('This bash script will be named as `' + container_name + '_zip.sh`')
# Zip-only script (container execution is now handled by containers-run
# in participant_job.sh)
print('\nGenerating zip script: ' + container_name + '_zip.sh')
bash_path = op.join(self.analysis_path, 'code', container_name + '_zip.sh')
container.generate_bash_run_bidsapp(bash_path, self.input_datasets, self.processing_level)
container.generate_bash_zip_outputs(bash_path, self.processing_level)
self.datalad_save(
path='code/' + container_name + '_zip.sh',
message='Generate script of running container',
message='Generate zip script',
)

# make another folder within `code` for test jobs:
Expand Down
54 changes: 48 additions & 6 deletions babs/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,19 @@
import yaml
from jinja2 import Environment, PackageLoader, StrictUndefined

from babs.generate_bidsapp_runscript import generate_bidsapp_runscript
from babs.generate_bidsapp_runscript import (
bids_app_args_from_config,
generate_bidsapp_runscript,
get_output_zipping_cmds,
)
from babs.generate_submit_script import generate_submit_script, generate_test_submit_script
from babs.utils import app_output_settings_from_config


class Container:
"""This class is for the BIDS App Container"""

def __init__(self, container_ds, container_name, config_yaml_file):
def __init__(self, container_ds, container_name, config_yaml_file, container_image_path=None):
"""
This is to initialize Container class.

Expand Down Expand Up @@ -67,9 +71,7 @@ def __init__(self, container_ds, container_name, config_yaml_file):
with open(self.config_yaml_file) as f:
self.config = yaml.safe_load(f)

self.container_path_relToAnalysis = op.join(
'containers', '.datalad', 'environments', self.container_name, 'image'
)
self.container_path_relToAnalysis = container_image_path

def sanity_check(self, analysis_path):
"""
Expand Down Expand Up @@ -101,6 +103,28 @@ def sanity_check(self, analysis_path):
+ "'."
)

def generate_bash_zip_outputs(self, bash_path, processing_level):
"""Generate a bash script that only zips BIDS App outputs."""
dict_zip_foldernames, _ = app_output_settings_from_config(self.config)
cmd_zip = get_output_zipping_cmds(dict_zip_foldernames, processing_level)

env = Environment(
loader=PackageLoader('babs', 'templates'),
trim_blocks=True,
lstrip_blocks=True,
autoescape=False,
undefined=StrictUndefined,
)
template = env.get_template('zip_outputs.sh.jinja2')
script_content = template.render(
processing_level=processing_level,
cmd_zip=cmd_zip,
)

with open(bash_path, 'w') as f:
f.write(script_content)
os.chmod(bash_path, 0o700)

def generate_bash_run_bidsapp(self, bash_path, input_ds, processing_level):
"""
This is to generate a bash script that runs the BIDS App singularity image.
Expand Down Expand Up @@ -165,16 +189,34 @@ def generate_bash_participant_job(
Shown in the script error message when PROJECT_ROOT is unset.
"""

input_datasets = input_ds.as_records()
_, bids_app_output_dir = app_output_settings_from_config(self.config)

raw_bids_app_args = self.config.get('bids_app_args', None)
if raw_bids_app_args:
bids_app_args, subject_selection_flag, _, _, bids_app_input_dir = (
bids_app_args_from_config(raw_bids_app_args, input_datasets)
)
else:
bids_app_args = []
subject_selection_flag = '--participant-label'
bids_app_input_dir = input_datasets[0]['unzipped_path_containing_subject_dirs']

script_content = generate_submit_script(
queue_system=system.type,
cluster_resources_config=self.config['cluster_resources'],
script_preamble=self.config['script_preamble'],
job_scratch_directory=self.config['job_compute_space'],
input_datasets=input_ds.as_records(),
input_datasets=input_datasets,
processing_level=processing_level,
container_name=self.container_name,
zip_foldernames=self.config['zip_foldernames'],
project_root=project_root,
container_image_path=self.container_path_relToAnalysis,
bids_app_args=bids_app_args,
bids_app_input_dir=bids_app_input_dir,
bids_app_output_dir=bids_app_output_dir,
subject_selection_flag=subject_selection_flag,
)

with open(bash_path, 'w') as f:
Expand Down
10 changes: 10 additions & 0 deletions babs/generate_submit_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ def generate_submit_script(
container_images=None,
datalad_run_message=None,
project_root=None,
container_image_path=None,
bids_app_args=None,
bids_app_input_dir=None,
bids_app_output_dir=None,
subject_selection_flag=None,
):
"""
Generate a bash script that runs the BIDS App singularity image.
Expand Down Expand Up @@ -122,6 +127,11 @@ def generate_submit_script(
container_images=container_images,
datalad_run_message=datalad_run_message,
project_root=project_root,
container_image_path=container_image_path,
bids_app_args=bids_app_args or [],
bids_app_input_dir=bids_app_input_dir or '',
bids_app_output_dir=bids_app_output_dir or '',
subject_selection_flag=subject_selection_flag or '',
)


Expand Down
53 changes: 35 additions & 18 deletions babs/templates/participant_job.sh.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ if ! git sparse-checkout init --cone; then
fi

git sparse-checkout set \
.datalad \
code \
containers \
{% for input_dataset in input_datasets %}
Expand Down Expand Up @@ -107,9 +108,8 @@ fi
{{ zip_locator_text }}

# Link to shared container image so each job does not re-clone the same image.
# If shared path is not available (e.g. Slurm Docker workers), retrieve image in this clone.
CONTAINER_SHARED="${PROJECT_ROOT}/analysis/containers/.datalad/environments/{{ container_name }}/image"
CONTAINER_JOB="containers/.datalad/environments/{{ container_name }}/image"
CONTAINER_SHARED="${PROJECT_ROOT}/analysis/{{ container_image_path }}"
CONTAINER_JOB="{{ container_image_path }}"

if [ ! -L "${CONTAINER_SHARED}" ]; then
echo "ERROR: shared container image not found at ${CONTAINER_SHARED}" >&2
Expand All @@ -125,35 +125,52 @@ if [ ! -L "${CONTAINER_JOB}" ]; then
exit 1
fi

# datalad run:
datalad run \
-i "{{ run_script_relpath if run_script_relpath else 'code/' + container_name + '_zip.sh' }}" \
# Step 1: Run BIDS app via containers-run
datalad containers-run \
-n {{ container_name }} \
--explicit \
{% for input_dataset in input_datasets %}
{% if not input_dataset['is_zipped'] %}
-i "{{ input_dataset['unzipped_path_containing_subject_dirs'] }}/{% raw %}${subid}{% endraw %}{% if processing_level == 'session' %}/{% raw %}${sesid}{% endraw %}{% endif %}" \
-i "{{ input_dataset['path_in_babs'] }}/dataset_description.json" \
--input "{{ input_dataset['unzipped_path_containing_subject_dirs'] }}/{% raw %}${subid}{% endraw %}{% if processing_level == 'session' %}/{% raw %}${sesid}{% endraw %}{% endif %}" \
--input "{{ input_dataset['path_in_babs'] }}/dataset_description.json" \
{% else %}
-i "${%raw%}{{%endraw%}{{ input_dataset['name'].upper() }}_ZIP{%raw%}}{%endraw%}" \
--input "${%raw%}{{%endraw%}{{ input_dataset['name'].upper() }}_ZIP{%raw%}}{%endraw%}" \
{% endif %}
{% endfor %}
{% if container_images %}
{% for image_path in container_images %}
-i "{{ image_path }}" \
{% endfor %}
{% else %}
-i "containers/.datalad/environments/{{container_name}}/image" \
{% endif %}
{% if datalad_expand_inputs %}
--expand inputs \
{% endif %}
--output "{{ bids_app_output_dir }}" \
-m "{{ container_name }} {% raw %}${subid}{% endraw %}{% if processing_level == 'session' %} {% raw %}${sesid}{% endraw %}{% endif %}" \
-- \
{{ bids_app_input_dir }} \
{{ bids_app_output_dir }} \
participant \
{% for bids_app_arg in bids_app_args %}
{% if bids_app_arg %}
{{ bids_app_arg }} \
{% endif %}
{% endfor %}
{{ subject_selection_flag }} "{% raw %}${subid}{% endraw %}"

# Unlock outputs so zip gets real content, not annex symlinks
datalad unlock {{ bids_app_output_dir }}

# Step 2: Zip outputs
datalad run \
--explicit \
{% if zip_foldernames is not none %}
{% for key, value in zip_foldernames.items() %}
-o "{% raw %}${subid}{% endraw %}{% if processing_level == 'session' %}_{% raw %}${sesid}{% endraw %}{% endif %}_{{ key }}-{{ value }}.zip" \
{% endfor %}
{% endif %}
-m "{{ (datalad_run_message if datalad_run_message is defined and datalad_run_message else container_name) }} {% raw %}${subid}{% endraw %}{% if processing_level == 'session' %} {% raw %}${sesid}{% endraw %}{% endif %}" \
"bash ./{{ run_script_relpath if run_script_relpath else 'code/' + container_name + '_zip.sh' }} {% raw %}${subid}{% endraw %} {% if processing_level == 'session' %} {% raw %}${sesid}{% endraw %}{% endif %}{% for input_dataset in input_datasets %}{% if input_dataset['is_zipped'] %} ${%raw%}{{%endraw%}{{ input_dataset['name'].upper() }}_ZIP{%raw%}}{%endraw%}{%endif%}{%endfor%}"
-m "Zip {% raw %}${subid}{% endraw %}{% if processing_level == 'session' %} {% raw %}${sesid}{% endraw %}{% endif %}" \
-- \
bash ./code/{{ container_name }}_zip.sh "{% raw %}${subid}{% endraw %}"{% if processing_level == 'session' %} "{% raw %}${sesid}{% endraw %}"{% endif %}

# Step 3: Remove raw outputs (datalad run --explicit doesn't track deletions)
git rm -rf --sparse {{ bids_app_output_dir }}
git commit -m "Remove raw outputs for {% raw %}${subid}{% endraw %}{% if processing_level == 'session' %} {% raw %}${sesid}{% endraw %}{% endif %} (zipped)"

# Finish up:
# push result file content to output RIA storage:
Expand Down
9 changes: 9 additions & 0 deletions babs/templates/zip_outputs.sh.jinja2
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash
set -e -u -x

subid="$1"
{% if processing_level == 'session' %}
sesid="$2"
{% endif %}

{{ cmd_zip }}
Loading
Loading