EBIvariation · nitin-ebi · Apr 13, 2026 · Apr 7, 2026 · Apr 11, 2026
diff --git a/eva_sub_cli/file_utils.py b/eva_sub_cli/file_utils.py
@@ -1,7 +1,6 @@
 import glob
 import gzip
 import os
-import shutil
 import time
 from itertools import groupby
 
@@ -132,28 +131,6 @@ def _assess_vcf_evidence_type_manual(vcf_file):
         open_file.close()
 
 
-def backup_file_or_directory(file_name, max_backups=None):
-    """
-    Rename a file or directory by adding a '.1' at the end. If the '.1' file exists it move it to a '.2' and so on.
-    Keep at most the specified number of backups, if None will keep all.
-    """
-    suffix = 1
-    backup_name = f'{file_name}.{suffix}'
-    while os.path.exists(backup_name):
-        suffix += 1
-        backup_name = f'{file_name}.{suffix}'
-
-    for i in range(suffix, 1, -1):
-        if max_backups and i > max_backups:
-            if os.path.isfile(file_name):
-                os.remove(f'{file_name}.{i - 1}')
-            else:
-                shutil.rmtree(f'{file_name}.{i - 1}')
-        else:
-            os.rename(f'{file_name}.{i - 1}', f'{file_name}.{i}')
-    os.rename(file_name, file_name + '.1')
-
-
 def open_gzip_if_required(input_file, mode='r'):
     """Open a file in read mode using gzip if the file extension says .gz"""
     if input_file.endswith('.gz'):

diff --git a/eva_sub_cli/nextflow/validation.nf b/eva_sub_cli/nextflow/validation.nf
@@ -177,6 +177,7 @@ process check_vcf_valid {
     """
     trap 'if [[ \$? == 1 ]]; then exit 0; fi' EXIT
 
+    rm -rf ${output_dir}/vcf_format
     mkdir -p vcf_format
     $params.executable.vcf_validator -i $vcf -r text -o vcf_format --require-evidence > vcf_format/${vcf}.vcf_format.log 2>&1
     """
@@ -206,6 +207,7 @@ process check_vcf_reference {
     """
     trap 'if [[ \$? == 1 || \$? == 139 ]]; then exit 0; fi' EXIT
 
+    rm -rf ${output_dir}/assembly_check
     mkdir -p assembly_check
     $params.executable.vcf_assembly_checker -i $vcf -f $fasta $report_opt -r summary,text  -o assembly_check --require-genbank > assembly_check/${vcf}.assembly_check.log 2>&1
     """

diff --git a/eva_sub_cli/validators/validator.py b/eva_sub_cli/validators/validator.py
@@ -8,12 +8,12 @@
 import yaml
 from ebi_eva_common_pyutils.command_utils import run_command_with_output
 from ebi_eva_common_pyutils.config import WritableConfig
-from ebi_eva_common_pyutils.logger import logging_config, AppLogger
+from ebi_eva_common_pyutils.logger import AppLogger
 from packaging import version
 
 import eva_sub_cli
 from eva_sub_cli import ETC_DIR, SUB_CLI_CONFIG_FILE, __version__
-from eva_sub_cli.file_utils import backup_file_or_directory, resolve_single_file_path
+from eva_sub_cli.file_utils import resolve_single_file_path
 from eva_sub_cli.metadata import EvaMetadataJson
 from eva_sub_cli.report import generate_html_report, generate_text_report
 from eva_sub_cli.validators.validation_results_parsers import parse_assembly_check_log, parse_assembly_check_report, \
@@ -43,7 +43,6 @@
 PASS = 'pass'
 
 
-
 class Validator(AppLogger):
 
     def __init__(self, mapping_file, submission_dir, project_title=None, metadata_json=None, metadata_xlsx=None,
@@ -130,16 +129,15 @@ def _validate(self):
         raise NotImplementedError
 
     def set_up_output_dir(self):
-        if os.path.exists(self.output_dir):
-            backup_file_or_directory(self.output_dir, max_backups=9)
-        os.makedirs(self.output_dir, exist_ok=True)
+        if not os.path.exists(self.output_dir):
+            os.makedirs(self.output_dir, exist_ok=True)
 
     def clean_up_output_dir(self):
         # Move intermediate validation outputs into a subdir except metadata.json
         subdir = os.path.join(self.output_dir, 'other_validations')
-        os.mkdir(subdir)
+        os.makedirs(subdir, exist_ok=True)
         for file_name in os.listdir(self.output_dir):
-            if file_name == 'metadata.json':
+            if file_name in ['metadata.json', 'report.txt', 'report.html']:
                 continue
             file_path = os.path.join(self.output_dir, file_name)
             if os.path.isfile(file_path):
@@ -210,7 +208,6 @@ def update_previous_version_results(self):
                 del self.results[SHALLOW_VALIDATION]['required']
             del self.results[SHALLOW_VALIDATION]['requested']
 
-
     def _collect_validation_workflow_results(self):
         # Collect information from the output and summarise in the config
         if self.shallow_validation:
@@ -550,7 +547,7 @@ def _collect_file_info_to_metadata(self):
                     file_path_2_file_size[vcf_file] = file_size
                     file_name_2_file_size[os.path.basename(vcf_file)] = file_size
         else:
-            error_txt =  f"Cannot locate file_info.txt at {os.path.join(self.output_dir, 'other_validations', 'file_info.txt')}"
+            error_txt = f"Cannot locate file_info.txt at {os.path.join(self.output_dir, 'other_validations', 'file_info.txt')}"
             self.error(error_txt)
             raise FileNotFoundError(error_txt)
 

diff --git a/tests/test_file_utils.py b/tests/test_file_utils.py
@@ -1,11 +1,10 @@
 import glob
 import os
 import shutil
-import time
 from pathlib import Path
 from unittest import TestCase
 
-from eva_sub_cli.file_utils import backup_file_or_directory, DirLock, DirLockError
+from eva_sub_cli.file_utils import DirLock, DirLockError
 
 
 def set_up_test_dir():
@@ -18,36 +17,6 @@ def clean_up():
         shutil.rmtree(file_name)
 
 
-def test_backup_file_or_directory():
-    set_up_test_dir()
-    backup_file_or_directory('backup_test')
-    assert not os.path.exists('backup_test')
-    assert os.path.exists('backup_test.1/nested/dir')
-    assert os.path.exists('backup_test.1/file.txt')
-    clean_up()
-
-
-def test_backup_file_or_directory_max_backups():
-    max_backups = 2
-
-    # Backup directory
-    for i in range(max_backups + 2):
-        set_up_test_dir()
-        backup_file_or_directory('backup_test', max_backups=max_backups)
-    for i in range(1, max_backups + 1):
-        assert os.path.exists(f'backup_test.{i}')
-    assert not os.path.exists(f'backup_test.{max_backups + 1}')
-
-    # Backup file
-    for i in range(max_backups + 2):
-        set_up_test_dir()
-        backup_file_or_directory('backup_test/file.txt', max_backups=max_backups)
-    for i in range(1, max_backups + 1):
-        assert os.path.exists(f'backup_test/file.txt.{i}')
-    assert not os.path.exists(f'backup_test/file.txt.{max_backups + 1}')
-    clean_up()
-
-
 class TestDirLock(TestCase):
     resources_folder = os.path.join(os.path.dirname(__file__), 'resources')