riix.metrics

module for computing metrics for rating system experiments

 1"""module for computing metrics for rating system experiments"""
 2
 3import numpy as np
 4
 5
 6def binary_accuracy(probs: np.ndarray, outcomes: np.ndarray) -> float:
 7    """compute accuracy where outcomes is binary ties count for half"""
 8    pos_mask = probs > 0.5
 9    neg_mask = probs < 0.5
10    draw_mask = probs == 0.5
11    correct = outcomes[pos_mask].sum() + (1.0 - outcomes[neg_mask]).sum() + 0.5 * draw_mask.sum()
12    return correct / probs.shape[0]
13
14def accuracy_without_draws(probs: np.ndarray, outcomes: np.ndarray) -> float:
15    """compute binary accuracy after first filtering out rows where the label is a draw"""
16    draw_mask = outcomes == 0.5
17    probs = probs[~draw_mask]
18    outcomes = outcomes[~draw_mask]
19    return binary_accuracy(probs, outcomes)
20
21
22def accuracy_with_draws(probs: np.ndarray, outcomes: np.ndarray, draw_margin=0.0) -> float:
23    """computes accuracy while allowing for ties"""
24    pos_pred_mask = probs > (0.5 + draw_margin)
25    neg_pred_mask = probs < (0.5 - draw_margin)
26    draw_pred_mask = np.abs(probs - draw_margin) <= draw_margin
27    correct = outcomes[pos_pred_mask].sum()
28    correct += (1.0 - outcomes)[neg_pred_mask].sum()
29    correct += 2 * outcomes[draw_pred_mask].sum()  # lmao
30    return correct / outcomes.shape[0]
31
32
33def binary_log_loss(probs: np.ndarray, outcomes: np.ndarray, eps: float = 1e-6) -> float:
34    """compute log loss where outcome is binary 1.0 or 0.0"""
35    probs = np.clip(probs, eps, 1 - eps)
36    loss_array = -(np.log(probs) * outcomes) - (np.log(1.0 - probs) * (1.0 - outcomes))
37    return loss_array.mean()
38
39
40def brier_score(probs: np.ndarray, outcomes: np.ndarray) -> float:
41    """compute the brier score, which is equivalent to the MSE"""
42    return np.square(probs - outcomes).mean()
43
44
45def binary_metrics_suite(probs: np.ndarray, outcomes: np.ndarray):
46    """a wrapper class for running a bunch of binary metrics"""
47    metrics = {
48        'accuracy': binary_accuracy(probs, outcomes),
49        'accuracy_without_draws' : accuracy_without_draws(probs, outcomes),
50        'log_loss': binary_log_loss(probs, outcomes),
51        'brier_score': brier_score(probs, outcomes),
52    }
53    return metrics
def binary_accuracy(probs: numpy.ndarray, outcomes: numpy.ndarray) -> float:
 7def binary_accuracy(probs: np.ndarray, outcomes: np.ndarray) -> float:
 8    """compute accuracy where outcomes is binary ties count for half"""
 9    pos_mask = probs > 0.5
10    neg_mask = probs < 0.5
11    draw_mask = probs == 0.5
12    correct = outcomes[pos_mask].sum() + (1.0 - outcomes[neg_mask]).sum() + 0.5 * draw_mask.sum()
13    return correct / probs.shape[0]

compute accuracy where outcomes is binary ties count for half

def accuracy_without_draws(probs: numpy.ndarray, outcomes: numpy.ndarray) -> float:
15def accuracy_without_draws(probs: np.ndarray, outcomes: np.ndarray) -> float:
16    """compute binary accuracy after first filtering out rows where the label is a draw"""
17    draw_mask = outcomes == 0.5
18    probs = probs[~draw_mask]
19    outcomes = outcomes[~draw_mask]
20    return binary_accuracy(probs, outcomes)

compute binary accuracy after first filtering out rows where the label is a draw

def accuracy_with_draws(probs: numpy.ndarray, outcomes: numpy.ndarray, draw_margin=0.0) -> float:
23def accuracy_with_draws(probs: np.ndarray, outcomes: np.ndarray, draw_margin=0.0) -> float:
24    """computes accuracy while allowing for ties"""
25    pos_pred_mask = probs > (0.5 + draw_margin)
26    neg_pred_mask = probs < (0.5 - draw_margin)
27    draw_pred_mask = np.abs(probs - draw_margin) <= draw_margin
28    correct = outcomes[pos_pred_mask].sum()
29    correct += (1.0 - outcomes)[neg_pred_mask].sum()
30    correct += 2 * outcomes[draw_pred_mask].sum()  # lmao
31    return correct / outcomes.shape[0]

computes accuracy while allowing for ties

def binary_log_loss( probs: numpy.ndarray, outcomes: numpy.ndarray, eps: float = 1e-06) -> float:
34def binary_log_loss(probs: np.ndarray, outcomes: np.ndarray, eps: float = 1e-6) -> float:
35    """compute log loss where outcome is binary 1.0 or 0.0"""
36    probs = np.clip(probs, eps, 1 - eps)
37    loss_array = -(np.log(probs) * outcomes) - (np.log(1.0 - probs) * (1.0 - outcomes))
38    return loss_array.mean()

compute log loss where outcome is binary 1.0 or 0.0

def brier_score(probs: numpy.ndarray, outcomes: numpy.ndarray) -> float:
41def brier_score(probs: np.ndarray, outcomes: np.ndarray) -> float:
42    """compute the brier score, which is equivalent to the MSE"""
43    return np.square(probs - outcomes).mean()

compute the brier score, which is equivalent to the MSE

def binary_metrics_suite(probs: numpy.ndarray, outcomes: numpy.ndarray):
46def binary_metrics_suite(probs: np.ndarray, outcomes: np.ndarray):
47    """a wrapper class for running a bunch of binary metrics"""
48    metrics = {
49        'accuracy': binary_accuracy(probs, outcomes),
50        'accuracy_without_draws' : accuracy_without_draws(probs, outcomes),
51        'log_loss': binary_log_loss(probs, outcomes),
52        'brier_score': brier_score(probs, outcomes),
53    }
54    return metrics

a wrapper class for running a bunch of binary metrics