From 40393be84734db46915523aff365ad4aff077b7f Mon Sep 17 00:00:00 2001 From: Calvin Yeung Date: Tue, 17 Feb 2026 16:05:54 +0900 Subject: [PATCH 1/2] update --- .../event_data/soccer/soccer_event_class.py | 41 ++++++++++++++----- .../event_data/soccer/soccer_load_data.py | 10 +++-- .../event_data/soccer/soccer_processing.py | 3 +- pyproject.toml | 2 +- 4 files changed, 40 insertions(+), 16 deletions(-) diff --git a/preprocessing/sports/event_data/soccer/soccer_event_class.py b/preprocessing/sports/event_data/soccer/soccer_event_class.py index 4802274..d1e1e46 100644 --- a/preprocessing/sports/event_data/soccer/soccer_event_class.py +++ b/preprocessing/sports/event_data/soccer/soccer_event_class.py @@ -92,7 +92,7 @@ def load_data_single_file(self): if self.soccertrackv2 == True: df=soccer_load_data.load_soccertrack(self.event_path, self.tracking_path, self.meta_data, self.verbose) elif self.soccertrackv2 == False: - df=soccer_load_data.load_bepro(self.event_path, self.tracking_path, self.meta_data, self.verbose) + df=soccer_load_data.load_bepro(self.event_path, self.tracking_path, self.meta_data, self.match_id, self.verbose) else: raise ValueError('Data provider not supported or not found') return df @@ -100,7 +100,9 @@ def load_data_single_file(self): def load_data(self): print(f'Loading data from {self.data_provider}') #check if the event path is a single file or a directory - if ((self.event_path is not None and os.path.isfile(self.event_path)) and self.data_provider != 'statsbomb') or \ + if self.data_provider == 'bepro' and self.soccertrackv2 == False: + df = self.load_data_single_file() + elif ((self.event_path is not None and os.path.isfile(self.event_path)) and self.data_provider != 'statsbomb') or \ (self.data_provider == 'statsbomb' and self.statsbomb_match_id is None and os.path.isfile(self.event_path)) or \ (self.data_provider == 'statsbomb_skillcorner' and self.statsbomb_match_id is not None): df = self.load_data_single_file() @@ -585,12 +587,31 @@ def process_single_match(match_id): # df_datastadium.to_csv(os.getcwd()+"/test/sports/event_data/data/datastadium/preprocess_UIED_class_multi.csv",index=False) #test soccertrack - soccer_track_event_path="/data_pool_1/soccertrackv2/2024-03-18/Event/event.csv" - soccer_track_tracking_path="/data_pool_1/soccertrackv2/2024-03-18/Tracking/tracking.xml" - soccer_track_meta_path="/data_pool_1/soccertrackv2/2024-03-18/Tracking/meta.xml" - df_soccertrack=Soccer_event_data('bepro',soccer_track_event_path, - st_track_path = soccer_track_tracking_path, - st_meta_path = soccer_track_meta_path, - verbose = True).load_data() - df_soccertrack.to_csv(os.getcwd()+"/test/sports/event_data/data/soccertrack/test_load_soccer_event_class.csv",index=False) + # soccer_track_event_path="/data_pool_1/soccertrackv2/2024-03-18/Event/event.csv" + # soccer_track_tracking_path="/data_pool_1/soccertrackv2/2024-03-18/Tracking/tracking.xml" + # soccer_track_meta_path="/data_pool_1/soccertrackv2/2024-03-18/Tracking/meta.xml" + # df_soccertrack=Soccer_event_data('bepro',soccer_track_event_path, + # st_track_path = soccer_track_tracking_path, + # st_meta_path = soccer_track_meta_path, + # verbose = True).load_data() + # df_soccertrack.to_csv(os.getcwd()+"/test/sports/event_data/data/soccertrack/test_load_soccer_event_class.csv",index=False) + + #test bepro + data_dir=["/data_pool_1/soccertrackv2/117093/2023-11-25_筑波大学 vs 筑波大学 - B1_1st Half.json", + "/data_pool_1/soccertrackv2/117093/2023-11-25_筑波大学 vs 筑波大学 - B1_2nd Half.json"] + tracking_path="/data_pool_1/soccertrackv2/117093/tracker_box_data_125091.xml" + meta_data="/data_pool_1/soccertrackv2/117093/tracker_box_metadata_125091.xml" + + df_bepro=Soccer_event_data(data_provider='bepro', + event_path=data_dir, + tracking_path=tracking_path, + meta_data=meta_data, + preprocess_method="UIED", + max_workers=1, + match_id=117093, + verbose=True).preprocessing() + + #save + df_bepro.to_csv(os.getcwd()+"/test/sports/event_data/data/bepro/test_load_soccer_event_class.csv",index=False) + print("-----------done-----------") diff --git a/preprocessing/sports/event_data/soccer/soccer_load_data.py b/preprocessing/sports/event_data/soccer/soccer_load_data.py index da4c4d5..0596330 100644 --- a/preprocessing/sports/event_data/soccer/soccer_load_data.py +++ b/preprocessing/sports/event_data/soccer/soccer_load_data.py @@ -7,6 +7,7 @@ from statsbombpy import sb from tqdm import tqdm from datetime import datetime +from typing import Union import os import pdb @@ -1537,7 +1538,7 @@ def get_tracking_features(event_df, tracking_data, meta_data, verbose=True): return event_df -def load_bepro(event_path: str, tracking_path: str, meta_path: str, verbose: bool = False) -> pd.DataFrame: +def load_bepro(event_path: str, tracking_path: str, meta_path: str, match_id:Union[int, str], verbose: bool = False) -> pd.DataFrame: """ Loads and processes event and tracking data from soccer match recordings. @@ -1697,7 +1698,10 @@ def read_tracking_data(file_path): #load the event data - def get_additional_features(event_df, meta_data): + def get_additional_features(event_df, meta_data, match_id=None): + + event_df['match_id'] = match_id + #player info: id name nameEN shirtNumber position # create features period, seconds, event_type, event_type_2, outcome, home_team, x_unscaled, y_unscaled, period_dict = {1:"FIRST_HALF",2:"SECOND_HALF",3:"EXTRA_FIRST_HALF",4:"EXTRA_SECOND_HALF"} @@ -1902,7 +1906,7 @@ def get_tracking_features(event_df, tracking_data, meta_data, verbose=True): # Load the meta data meta_data = read_meta_data(meta_path) # Get additional features - event_df = get_additional_features(event_df, meta_data) + event_df = get_additional_features(event_df, meta_data, match_id=match_id) # Get tracking features event_df = get_tracking_features(event_df, tracking_data, meta_data, verbose=verbose) diff --git a/preprocessing/sports/event_data/soccer/soccer_processing.py b/preprocessing/sports/event_data/soccer/soccer_processing.py index 1e97266..1c3749b 100644 --- a/preprocessing/sports/event_data/soccer/soccer_processing.py +++ b/preprocessing/sports/event_data/soccer/soccer_processing.py @@ -2068,7 +2068,6 @@ def UIED_bepro(data): else: poss_id += 1 poss_id_list.append(poss_id) - poss_id+=1 df["poss_id"] = poss_id_list new_df = [] @@ -2166,7 +2165,7 @@ def UIED_bepro(data): tracking_col_home = [col for col in tracking_col_home if col != "home_team"] tracking_col_away = df.columns[df.columns.str.startswith("away_")].tolist() - df = df[['poss_id', 'team', 'home_team', 'action', 'success', 'goal', 'home_score', + df = df[['match_id', 'poss_id', 'team', 'home_team', 'action', 'success', 'goal', 'home_score', 'away_score', 'goal_diff', 'Period', 'Minute', 'Second', 'seconds', "delta_T", 'start_x', 'start_y', 'deltaX', 'deltaY', 'distance', 'dist2goal', 'angle2goal']+tracking_col_home+tracking_col_away] diff --git a/pyproject.toml b/pyproject.toml index e4bba31..5d85b9f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" [project] name = "openstarlab_preprocessing" -version = "0.1.51" +version = "0.1.52" description = "openstarlab preprocessing package" readme = "README.md" requires-python = ">=3.8" From d8955d5468fee65f787cf93e18b155689885db51 Mon Sep 17 00:00:00 2001 From: Calvin Yeung Date: Tue, 17 Feb 2026 16:07:05 +0900 Subject: [PATCH 2/2] update --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5d85b9f..c270bc6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" [project] name = "openstarlab_preprocessing" -version = "0.1.52" +version = "0.1.53" description = "openstarlab preprocessing package" readme = "README.md" requires-python = ">=3.8"