-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrun_evaluation.py
104 lines (76 loc) · 4.22 KB
/
run_evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from src.utils import hydra_custom_resolvers
from typing import List
from pytorch_lightning.loggers import Logger
from src import utils
from src.utils import general_helpers, evaluation_helpers
import hydra
import os
from omegaconf import DictConfig
import pytorch_lightning as pl
import wandb
log = utils.get_pylogger(__name__)
def run_evaluation(cfg: DictConfig):
# Set seed for random number generators in PyTorch, Numpy and Python (random)
if cfg.get("seed"):
pl.seed_everything(cfg.seed, workers=True)
# Initialize the loggers
log.info("Instantiating loggers...")
loggers: List[Logger] = general_helpers.instantiate_loggers(cfg.get("logger"))
if loggers:
log.info("Logging hyperparameters!")
utils.log_hyperparameters(cfg, None, loggers)
assert cfg.output_dir is not None, "Path to the directory in which the predictions will be written must be given"
cfg.output_dir = os.path.relpath(cfg.output_dir)
log.info(f"Output directory: {cfg.output_dir}")
# Get the inference run's config &
# Sync the predictions and the results from WandB in the exp_dir (downloads the data if is not found locally)
api = wandb.Api()
run = api.run(cfg.wandb_run_path)
ir_wandb_config, ir_hydra_config, exp_dir = evaluation_helpers.sync_experiment_data(
cfg.wandb_run_path, work_dir=cfg.work_dir
)
# Get the problem dataset (containing the metadata for the problems in the predictions dataset)
problems_dataset = evaluation_helpers.get_dataset_used_in_run(ir_hydra_config, cfg.split_to_evaluate, cfg.data_dir)
# Initialize the predictions dataset
cfg.predictions_dataset.data_dir = general_helpers.get_predictions_dir_path(exp_dir)
predictions_dataset = hydra.utils.instantiate(cfg.predictions_dataset, _recursive_=False)
# Read the (potentially) existing evaluation output
if cfg.local_results_cache_dir is not None:
log.info(f"Loading evaluation output from local cache directory: {cfg.local_results_cache_dir}")
evaluation_output = evaluation_helpers.read_evaluation_output(cfg.local_results_cache_dir)
else:
log.info("reading eval output from exp dir")
evaluation_output = evaluation_helpers.read_evaluation_output(exp_dir)
if cfg.complete_override:
log.info("Complete override is set to True. The (potentially) existing evaluation output will be overwritten.")
evaluation_output = []
# keep only one id, for testing
# pred_ids = [p['id'] for p in predictions_dataset]
# _id = pred_ids[0]
# problems_dataset = [p for p in problems_dataset if p['id'] == _id]
# predictions_dataset = [p for p in predictions_dataset if p['id'] == _id]
# evaluation_output = [
# {"id":_id, "online_judge":[
# {"evaluation_status": "completed", "compilation_status":True, "compilation_error_message":None, "hidden_tests_results":[{"status":True, "error_message":None} for _ in range(5)]},
# {"evaluation_status": "failed submission"}
# ]}]
# Instantiate the code evaluator object(s)
log.info(f"Instantiating the code evaluator(s)")
code_evaluators = hydra.utils.instantiate(cfg.code_evaluator, _recursive_=True)
# Evaluate the predictions
for _, ce in code_evaluators.items():
log.info(f"Evaluating {len(predictions_dataset)} predictions with {ce.name}.")
evaluation_output = ce.evaluate_dataset(problems_dataset, predictions_dataset, evaluation_output, cfg.override)
log.info(f"Writing the evaluation output to disk...")
evaluation_helpers.write_evaluation_output(cfg.output_dir, evaluation_output)
log.info(f"Output directory: {cfg.output_dir}")
log.info(f"Uploading the evaluation output to WandB...")
path_to_evaluation_output_file = os.path.join(cfg.output_dir, "evaluation_output.jsonl")
general_helpers.upload_file_to_wandb(cfg.output_dir, path_to_evaluation_output_file) # current run
run.upload_file(path_to_evaluation_output_file, root=cfg.output_dir) # original run
@hydra.main(version_base="1.2", config_path="configs", config_name="evaluation_root")
def main(hydra_config: DictConfig):
utils.run_task(hydra_config, run_evaluation)
if __name__ == "__main__":
main()
#