riix.eval

utils for evaluating rating systems

 1"""utils for evaluating rating systems"""
 2import time
 3from functools import partial
 4from copy import deepcopy
 5from multiprocessing import Pool
 6import numpy as np
 7from riix.core.base import OnlineRatingSystem
 8from riix.utils.data_utils import TimedPairDataset
 9from riix.metrics import binary_metrics_suite
10
11
12def evaluate(model: OnlineRatingSystem, dataset: TimedPairDataset, metrics_mask: np.ndarray=None):
13    """evaluate a rating system on a dataset"""
14    start_time = time.time()
15    if metrics_mask is None:
16        metrics_mask = np.ones(len(dataset), dtype=bool_)
17    probs = model.fit_dataset(dataset, return_pre_match_probs=True)[metrics_mask]
18    outcomes = dataset.outcomes[metrics_mask]
19    duration = time.time() - start_time
20    metrics = binary_metrics_suite(probs, outcomes)
21    metrics['duration'] = duration
22    return metrics
23
24
25def eval_wrapper(params, rating_system_class, dataset, metrics_mask):
26    model = rating_system_class(competitors=dataset.competitors, **params)
27    return evaluate(model, dataset, metrics_mask)
28
29
30def grid_search(
31    rating_system_class,
32    dataset,
33    metrics_mask,
34    param_configurations=None,
35    metric='log_loss',
36    minimize_metric=True,
37    num_processes=None,
38    return_all_metrics=False,
39):
40    """Perform grid search and return the best hyperparameters."""
41    map_fn = map
42    if num_processes:
43        pool = Pool(num_processes)
44        map_fn = pool.map
45
46    best_params = {}
47    best_metrics = {}
48    metric_multiplier = 1.0 if minimize_metric else -1.0
49    best_metric = np.inf
50
51    func = partial(
52        eval_wrapper,
53        rating_system_class=rating_system_class,
54        dataset=dataset,
55        metrics_mask=metrics_mask
56    )
57    all_metrics = list(map_fn(func, param_configurations))
58    if num_processes:
59        pool.close()
60        pool.join()
61
62    for current_params, current_metrics in zip(param_configurations, all_metrics):
63        current_metric = current_metrics[metric]
64        # Compare and update best metric and params
65        if current_metric * metric_multiplier < best_metric:
66            best_metric = current_metric * metric_multiplier
67            best_metrics = deepcopy(current_metrics)
68            best_params = current_params
69
70    if not return_all_metrics:
71        return best_params, best_metrics
72    return best_params, best_metrics, all_metrics
def evaluate( model: riix.core.base.OnlineRatingSystem, dataset: riix.utils.data_utils.TimedPairDataset, metrics_mask: numpy.ndarray = None):
13def evaluate(model: OnlineRatingSystem, dataset: TimedPairDataset, metrics_mask: np.ndarray=None):
14    """evaluate a rating system on a dataset"""
15    start_time = time.time()
16    if metrics_mask is None:
17        metrics_mask = np.ones(len(dataset), dtype=bool_)
18    probs = model.fit_dataset(dataset, return_pre_match_probs=True)[metrics_mask]
19    outcomes = dataset.outcomes[metrics_mask]
20    duration = time.time() - start_time
21    metrics = binary_metrics_suite(probs, outcomes)
22    metrics['duration'] = duration
23    return metrics

evaluate a rating system on a dataset

def eval_wrapper(params, rating_system_class, dataset, metrics_mask):
26def eval_wrapper(params, rating_system_class, dataset, metrics_mask):
27    model = rating_system_class(competitors=dataset.competitors, **params)
28    return evaluate(model, dataset, metrics_mask)