Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 0 additions & 23 deletions eva_sub_cli/file_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import glob
import gzip
import os
import shutil
import time
from itertools import groupby

Expand Down Expand Up @@ -132,28 +131,6 @@ def _assess_vcf_evidence_type_manual(vcf_file):
open_file.close()


def backup_file_or_directory(file_name, max_backups=None):
"""
Rename a file or directory by adding a '.1' at the end. If the '.1' file exists it move it to a '.2' and so on.
Keep at most the specified number of backups, if None will keep all.
"""
suffix = 1
backup_name = f'{file_name}.{suffix}'
while os.path.exists(backup_name):
suffix += 1
backup_name = f'{file_name}.{suffix}'

for i in range(suffix, 1, -1):
if max_backups and i > max_backups:
if os.path.isfile(file_name):
os.remove(f'{file_name}.{i - 1}')
else:
shutil.rmtree(f'{file_name}.{i - 1}')
else:
os.rename(f'{file_name}.{i - 1}', f'{file_name}.{i}')
os.rename(file_name, file_name + '.1')


def open_gzip_if_required(input_file, mode='r'):
"""Open a file in read mode using gzip if the file extension says .gz"""
if input_file.endswith('.gz'):
Expand Down
2 changes: 2 additions & 0 deletions eva_sub_cli/nextflow/validation.nf
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ process check_vcf_valid {
"""
trap 'if [[ \$? == 1 ]]; then exit 0; fi' EXIT

rm -rf ${output_dir}/vcf_format
mkdir -p vcf_format
$params.executable.vcf_validator -i $vcf -r text -o vcf_format --require-evidence > vcf_format/${vcf}.vcf_format.log 2>&1
"""
Expand Down Expand Up @@ -206,6 +207,7 @@ process check_vcf_reference {
"""
trap 'if [[ \$? == 1 || \$? == 139 ]]; then exit 0; fi' EXIT

rm -rf ${output_dir}/assembly_check
mkdir -p assembly_check
$params.executable.vcf_assembly_checker -i $vcf -f $fasta $report_opt -r summary,text -o assembly_check --require-genbank > assembly_check/${vcf}.assembly_check.log 2>&1
"""
Expand Down
17 changes: 7 additions & 10 deletions eva_sub_cli/validators/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
import yaml
from ebi_eva_common_pyutils.command_utils import run_command_with_output
from ebi_eva_common_pyutils.config import WritableConfig
from ebi_eva_common_pyutils.logger import logging_config, AppLogger
from ebi_eva_common_pyutils.logger import AppLogger
from packaging import version

import eva_sub_cli
from eva_sub_cli import ETC_DIR, SUB_CLI_CONFIG_FILE, __version__
from eva_sub_cli.file_utils import backup_file_or_directory, resolve_single_file_path
from eva_sub_cli.file_utils import resolve_single_file_path
from eva_sub_cli.metadata import EvaMetadataJson
from eva_sub_cli.report import generate_html_report, generate_text_report
from eva_sub_cli.validators.validation_results_parsers import parse_assembly_check_log, parse_assembly_check_report, \
Expand Down Expand Up @@ -43,7 +43,6 @@
PASS = 'pass'



class Validator(AppLogger):

def __init__(self, mapping_file, submission_dir, project_title=None, metadata_json=None, metadata_xlsx=None,
Expand Down Expand Up @@ -130,16 +129,15 @@ def _validate(self):
raise NotImplementedError

def set_up_output_dir(self):
if os.path.exists(self.output_dir):
backup_file_or_directory(self.output_dir, max_backups=9)
os.makedirs(self.output_dir, exist_ok=True)
if not os.path.exists(self.output_dir):
Comment thread
apriltuesday marked this conversation as resolved.
os.makedirs(self.output_dir, exist_ok=True)

def clean_up_output_dir(self):
# Move intermediate validation outputs into a subdir except metadata.json
subdir = os.path.join(self.output_dir, 'other_validations')
os.mkdir(subdir)
os.makedirs(subdir, exist_ok=True)
for file_name in os.listdir(self.output_dir):
if file_name == 'metadata.json':
if file_name in ['metadata.json', 'report.txt', 'report.html']:
continue
file_path = os.path.join(self.output_dir, file_name)
if os.path.isfile(file_path):
Expand Down Expand Up @@ -210,7 +208,6 @@ def update_previous_version_results(self):
del self.results[SHALLOW_VALIDATION]['required']
del self.results[SHALLOW_VALIDATION]['requested']


def _collect_validation_workflow_results(self):
# Collect information from the output and summarise in the config
if self.shallow_validation:
Expand Down Expand Up @@ -550,7 +547,7 @@ def _collect_file_info_to_metadata(self):
file_path_2_file_size[vcf_file] = file_size
file_name_2_file_size[os.path.basename(vcf_file)] = file_size
else:
error_txt = f"Cannot locate file_info.txt at {os.path.join(self.output_dir, 'other_validations', 'file_info.txt')}"
error_txt = f"Cannot locate file_info.txt at {os.path.join(self.output_dir, 'other_validations', 'file_info.txt')}"
self.error(error_txt)
raise FileNotFoundError(error_txt)

Expand Down
33 changes: 1 addition & 32 deletions tests/test_file_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import glob
import os
import shutil
import time
from pathlib import Path
from unittest import TestCase

from eva_sub_cli.file_utils import backup_file_or_directory, DirLock, DirLockError
from eva_sub_cli.file_utils import DirLock, DirLockError


def set_up_test_dir():
Expand All @@ -18,36 +17,6 @@ def clean_up():
shutil.rmtree(file_name)


def test_backup_file_or_directory():
set_up_test_dir()
backup_file_or_directory('backup_test')
assert not os.path.exists('backup_test')
assert os.path.exists('backup_test.1/nested/dir')
assert os.path.exists('backup_test.1/file.txt')
clean_up()


def test_backup_file_or_directory_max_backups():
max_backups = 2

# Backup directory
for i in range(max_backups + 2):
set_up_test_dir()
backup_file_or_directory('backup_test', max_backups=max_backups)
for i in range(1, max_backups + 1):
assert os.path.exists(f'backup_test.{i}')
assert not os.path.exists(f'backup_test.{max_backups + 1}')

# Backup file
for i in range(max_backups + 2):
set_up_test_dir()
backup_file_or_directory('backup_test/file.txt', max_backups=max_backups)
for i in range(1, max_backups + 1):
assert os.path.exists(f'backup_test/file.txt.{i}')
assert not os.path.exists(f'backup_test/file.txt.{max_backups + 1}')
clean_up()


class TestDirLock(TestCase):
resources_folder = os.path.join(os.path.dirname(__file__), 'resources')

Expand Down
Loading