From 774dc6066d182b23d4ed2f7426e5c3624b941979 Mon Sep 17 00:00:00 2001 From: Akshat Garg Date: Thu, 5 Mar 2026 12:47:36 +0530 Subject: [PATCH 1/3] Implement SAR2RL preprocessing method Added support for SAR2RL preprocessing method and improved error handling for the preprocessed directory. --- .../SAR_data/soccer/soccer_SAR_class.py | 33 +++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/preprocessing/sports/SAR_data/soccer/soccer_SAR_class.py b/preprocessing/sports/SAR_data/soccer/soccer_SAR_class.py index 9f49a7d..54d11ad 100644 --- a/preprocessing/sports/SAR_data/soccer/soccer_SAR_class.py +++ b/preprocessing/sports/SAR_data/soccer/soccer_SAR_class.py @@ -14,6 +14,7 @@ """ import os +from pathlib import Path from tqdm import tqdm from concurrent.futures import ThreadPoolExecutor, as_completed @@ -343,15 +344,43 @@ def preprocess_data(self, cleaning_dir=None, preprocessed_dir=None): cleaning_dir, preprocessed_dir, ) - ) + ) # Collect results as they are completed for future in tqdm(as_completed(futures), total=len(futures)): future.result() else: raise ValueError(f"Preprocessing method not supported for {self.data_provider}") + elif self.preprocess_method == "SAR2RL": + if preprocessed_dir is None: + if self.data_provider == "datastadium": + preprocessed_dir = os.getcwd() + "/data/dss/preprocess_data" + elif self.data_provider == "statsbomb_skillcorner": + preprocessed_dir = os.getcwd() + "/data/stb_skc/preprocess_data" + elif self.data_provider == "fifawc": + preprocessed_dir = os.getcwd() + "/data/fifawc/preprocess_data" + else: + raise ValueError( + "preprocessed_dir is required for preprocess_method='SAR2RL' when no provider default exists." + ) + + sar_preprocessed_dir = Path(preprocessed_dir) + if not sar_preprocessed_dir.exists() or not sar_preprocessed_dir.is_dir(): + raise ValueError( + f"SAR2RL input directory not found: {sar_preprocessed_dir}. " + "Run SAR preprocessing first or pass a valid preprocessed_dir." + ) + + output_dir = sar_preprocessed_dir / "rl_dataset" + print("Starting SAR-to-RL dataset conversion...") + from .soccer_sar_to_rl_dataset import build_rl_datasets_from_sar_events + + build_rl_datasets_from_sar_events( + sar_preprocessed_dir=sar_preprocessed_dir, + output_dir=output_dir, + ) else: raise ValueError( - "Preprocessing method is not defined. Please set preprocess_method to 'SAR' or other valid methods." + "Preprocessing method is not defined. Please set preprocess_method to 'SAR' or 'SAR2RL'." ) print("Data preprocessing completed successfully!") From 00c44abc79fadcc2cb58d6b6985ab74c263c6740 Mon Sep 17 00:00:00 2001 From: Akshat Garg Date: Wed, 25 Mar 2026 18:29:51 +0530 Subject: [PATCH 2/3] Update SAR2RL preprocessing method logic Refactor SAR2RL preprocessing method to enforce data provider restrictions and improve error handling for preprocessed directory. --- .../SAR_data/soccer/soccer_SAR_class.py | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/preprocessing/sports/SAR_data/soccer/soccer_SAR_class.py b/preprocessing/sports/SAR_data/soccer/soccer_SAR_class.py index 54d11ad..ecf6515 100644 --- a/preprocessing/sports/SAR_data/soccer/soccer_SAR_class.py +++ b/preprocessing/sports/SAR_data/soccer/soccer_SAR_class.py @@ -1,3 +1,6 @@ +Full current contents of `[soccer_SAR_class.py](/Users/akshatgarg/Desktop/PreProcessing/preprocessing/sports/SAR_data/soccer/soccer_SAR_class.py)`: + +```python # Target data provider [Metrica,Robocup 2D simulation,Statsbomb,Wyscout,Opta data,DataFactory,sportec] """ @@ -351,17 +354,15 @@ def preprocess_data(self, cleaning_dir=None, preprocessed_dir=None): else: raise ValueError(f"Preprocessing method not supported for {self.data_provider}") elif self.preprocess_method == "SAR2RL": + if self.data_provider != "robocup_2d": + raise ValueError( + "SAR2RL preprocessing is only supported for data_provider='robocup_2d'." + ) + if preprocessed_dir is None: - if self.data_provider == "datastadium": - preprocessed_dir = os.getcwd() + "/data/dss/preprocess_data" - elif self.data_provider == "statsbomb_skillcorner": - preprocessed_dir = os.getcwd() + "/data/stb_skc/preprocess_data" - elif self.data_provider == "fifawc": - preprocessed_dir = os.getcwd() + "/data/fifawc/preprocess_data" - else: - raise ValueError( - "preprocessed_dir is required for preprocess_method='SAR2RL' when no provider default exists." - ) + raise ValueError( + "preprocessed_dir is required for preprocess_method='SAR2RL' and data_provider='robocup_2d'." + ) sar_preprocessed_dir = Path(preprocessed_dir) if not sar_preprocessed_dir.exists() or not sar_preprocessed_dir.is_dir(): From 2990f055e6abd6ba13cffd8ec7417f389b0c1276 Mon Sep 17 00:00:00 2001 From: Akshat Garg Date: Fri, 27 Mar 2026 12:35:43 +0530 Subject: [PATCH 3/3] Remove full current contents comment from soccer_SAR_class.py --- preprocessing/sports/SAR_data/soccer/soccer_SAR_class.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/preprocessing/sports/SAR_data/soccer/soccer_SAR_class.py b/preprocessing/sports/SAR_data/soccer/soccer_SAR_class.py index ecf6515..45ef56d 100644 --- a/preprocessing/sports/SAR_data/soccer/soccer_SAR_class.py +++ b/preprocessing/sports/SAR_data/soccer/soccer_SAR_class.py @@ -1,5 +1,3 @@ -Full current contents of `[soccer_SAR_class.py](/Users/akshatgarg/Desktop/PreProcessing/preprocessing/sports/SAR_data/soccer/soccer_SAR_class.py)`: - ```python # Target data provider [Metrica,Robocup 2D simulation,Statsbomb,Wyscout,Opta data,DataFactory,sportec]