-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprocessResults.py
More file actions
152 lines (134 loc) · 6.02 KB
/
processResults.py
File metadata and controls
152 lines (134 loc) · 6.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import pathlib, os, json
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm import tqdm
def loadResults(xp_path):
""" load all results from a given experimaestro experiment directory
xp_path: pathlib.Path, path to the jobs to load.
"""
if type(xp_path) is not Path:
xp_path = Path(xp_path)
jobs = os.listdir(xp_path)
# print(f"available jobs: {jobs}")
results = []
for job in tqdm(jobs):
jobPath = xp_path / job
job_data = {"path": jobPath, "hash": str(job)}
with open( jobPath / "params.json") as json_file:
params = json.load(json_file)
params = params["objects"][0]["fields"]
# merge params with hash
#add params to job_data
job_data.update(params)
if "with_context" not in job_data:
job_data["with_context"] = False
if "dataset_name" not in job_data:
job_data["dataset_name"] = "WebNLG"
if "extraction_method" not in job_data:
job_data["extraction_method"] = "after_context"
# params = params["params"]
# print(job_data)
hist_path = jobPath / "history.json"
if not hist_path.exists():
# print(f"missing history for {job}")
pass
else:
try:
with open( hist_path) as json_file:
history = json.load(json_file)
except:
print(f"error loading {hist_path}")
job_data["history"] = history
files = os.listdir(jobPath)
# Find Evaluation files
eval_files = sorted([f for f in files if "evaluation" in f.lower()])
# print("found eval files:", eval_files)
if len(eval_files) == 0:
job_data["Eval"] = None
else:
eval_f = str(jobPath / eval_files[-1])
try:
with open(eval_f) as json_file:
job_data["Eval"] = json.load(json_file)
except:
print(f"error loading {eval_f}")
job_data["date"] = os.path.getmtime(eval_f)
# Find Inference files
inf_files = [f for f in files if "inference" in f.lower()]
if len(inf_files) == 0:
job_data["inference"] = None
else:
# Sort files by modification time, most recent first
inf_files.sort(key=lambda f: os.path.getmtime(jobPath / f), reverse=True)
# Select the most recent file
job_data["inference"] = jobPath / inf_files[0]
results.append(job_data)
return pd.DataFrame(results)
def get_inference_res(results,
model_name,
layer,
dataset_name=None,
with_context = False,
extraction_method="in_context",
verbose=False):
results = results[(results["model_name"] == model_name)
& (results["layer"] == layer)
& (results["with_context"] == with_context)
& (results["extraction_method"] == extraction_method)
]
if dataset_name:
results = results[results["dataset_name"] == dataset_name]
if verbose: print(f"found {len(results)} results for layer {layer} of {model_name} {'with' if with_context else 'without'} context {'on ' + dataset_name if dataset_name else ''} with method {extraction_method}.")
#get first row dict
if len(results) == 0:
return None
res = results.iloc[0].to_dict()
inference_file = res["path"] / res["inference"]
if verbose: print(f"got inference files: {inference_file}")
#get inference in results for layer
with open(inference_file) as json_file:
inference = json.load(json_file)
return inference
def get_taskVec(results, model_name, layer,
dataset_name=None,
with_context = False,
extraction_method="in_context",
verbose:bool=True):
results = results[(results["model_name"] == model_name) &
(results["layer"] == layer) &
(results["with_context"] == with_context) &
(results["extraction_method"] == extraction_method) &
(results["Eval"].notna())
]
if dataset_name:
results = results[results["dataset_name"] == dataset_name]
if verbose: print(f"found {len(results)} jobs for layer {layer} of {model_name} {'with' if with_context else 'without'} context {'on ' + dataset_name if dataset_name else ''} with method {extraction_method}.")
#get first row dict
if len(results) == 0:
return None
res = results.iloc[0].to_dict()
files = [file for file in os.listdir(res["path"]) if file.endswith(".pth") and 'task' in file.lower()]
#get the taskVec file
if verbose: print(f"found {files} ")
if len(files) == 0:
if verbose: print("No taskVec file found")
raise FileNotFoundError(f"No taskVec file found in {res['path']}")
else:
return res["path"] / files[0]
def get_linear_model(results, model_name, layer, dataset_name=None, with_context = False, verbose:bool=True):
results = results[(results["model_name"] == model_name) & (results["layer"] == layer) & (results["with_context"] == with_context)]
if dataset_name:
results = results[results["dataset_name"] == dataset_name]
if verbose: print(f"found {len(results)} jobs for layer {layer} of {model_name} {'with' if with_context else 'without'} context {'on ' + dataset_name if dataset_name else ''}")
#get first row dict
if len(results) == 0:
return None
res = results.iloc[0].to_dict()
files = [file for file in os.listdir(res["path"]) if file.endswith(".pth") and ('linear' in file.lower() or 'extractor' in file.lower())]
if verbose: print(f"found {files} ")
if len(files) == 0:
if verbose: print("No linear model pth found")
return None
else:
return res["path"] / files[0]