riix.eval
utils for evaluating rating systems
1"""utils for evaluating rating systems""" 2import time 3from functools import partial 4from copy import deepcopy 5from multiprocessing import Pool 6import numpy as np 7from riix.core.base import OnlineRatingSystem 8from riix.utils.data_utils import TimedPairDataset 9from riix.metrics import binary_metrics_suite 10 11 12def evaluate(model: OnlineRatingSystem, dataset: TimedPairDataset, metrics_mask: np.ndarray=None): 13 """evaluate a rating system on a dataset""" 14 start_time = time.time() 15 if metrics_mask is None: 16 metrics_mask = np.ones(len(dataset), dtype=bool_) 17 probs = model.fit_dataset(dataset, return_pre_match_probs=True)[metrics_mask] 18 outcomes = dataset.outcomes[metrics_mask] 19 duration = time.time() - start_time 20 metrics = binary_metrics_suite(probs, outcomes) 21 metrics['duration'] = duration 22 return metrics 23 24 25def eval_wrapper(params, rating_system_class, dataset, metrics_mask): 26 model = rating_system_class(competitors=dataset.competitors, **params) 27 return evaluate(model, dataset, metrics_mask) 28 29 30def grid_search( 31 rating_system_class, 32 dataset, 33 metrics_mask, 34 param_configurations=None, 35 metric='log_loss', 36 minimize_metric=True, 37 num_processes=None, 38 return_all_metrics=False, 39): 40 """Perform grid search and return the best hyperparameters.""" 41 map_fn = map 42 if num_processes: 43 pool = Pool(num_processes) 44 map_fn = pool.map 45 46 best_params = {} 47 best_metrics = {} 48 metric_multiplier = 1.0 if minimize_metric else -1.0 49 best_metric = np.inf 50 51 func = partial( 52 eval_wrapper, 53 rating_system_class=rating_system_class, 54 dataset=dataset, 55 metrics_mask=metrics_mask 56 ) 57 all_metrics = list(map_fn(func, param_configurations)) 58 if num_processes: 59 pool.close() 60 pool.join() 61 62 for current_params, current_metrics in zip(param_configurations, all_metrics): 63 current_metric = current_metrics[metric] 64 # Compare and update best metric and params 65 if current_metric * metric_multiplier < best_metric: 66 best_metric = current_metric * metric_multiplier 67 best_metrics = deepcopy(current_metrics) 68 best_params = current_params 69 70 if not return_all_metrics: 71 return best_params, best_metrics 72 return best_params, best_metrics, all_metrics
def
evaluate( model: riix.core.base.OnlineRatingSystem, dataset: riix.utils.data_utils.TimedPairDataset, metrics_mask: numpy.ndarray = None):
13def evaluate(model: OnlineRatingSystem, dataset: TimedPairDataset, metrics_mask: np.ndarray=None): 14 """evaluate a rating system on a dataset""" 15 start_time = time.time() 16 if metrics_mask is None: 17 metrics_mask = np.ones(len(dataset), dtype=bool_) 18 probs = model.fit_dataset(dataset, return_pre_match_probs=True)[metrics_mask] 19 outcomes = dataset.outcomes[metrics_mask] 20 duration = time.time() - start_time 21 metrics = binary_metrics_suite(probs, outcomes) 22 metrics['duration'] = duration 23 return metrics
evaluate a rating system on a dataset
def
eval_wrapper(params, rating_system_class, dataset, metrics_mask):
def
grid_search( rating_system_class, dataset, metrics_mask, param_configurations=None, metric='log_loss', minimize_metric=True, num_processes=None, return_all_metrics=False):
31def grid_search( 32 rating_system_class, 33 dataset, 34 metrics_mask, 35 param_configurations=None, 36 metric='log_loss', 37 minimize_metric=True, 38 num_processes=None, 39 return_all_metrics=False, 40): 41 """Perform grid search and return the best hyperparameters.""" 42 map_fn = map 43 if num_processes: 44 pool = Pool(num_processes) 45 map_fn = pool.map 46 47 best_params = {} 48 best_metrics = {} 49 metric_multiplier = 1.0 if minimize_metric else -1.0 50 best_metric = np.inf 51 52 func = partial( 53 eval_wrapper, 54 rating_system_class=rating_system_class, 55 dataset=dataset, 56 metrics_mask=metrics_mask 57 ) 58 all_metrics = list(map_fn(func, param_configurations)) 59 if num_processes: 60 pool.close() 61 pool.join() 62 63 for current_params, current_metrics in zip(param_configurations, all_metrics): 64 current_metric = current_metrics[metric] 65 # Compare and update best metric and params 66 if current_metric * metric_multiplier < best_metric: 67 best_metric = current_metric * metric_multiplier 68 best_metrics = deepcopy(current_metrics) 69 best_params = current_params 70 71 if not return_all_metrics: 72 return best_params, best_metrics 73 return best_params, best_metrics, all_metrics
Perform grid search and return the best hyperparameters.