riix.metrics
module for computing metrics for rating system experiments
1"""module for computing metrics for rating system experiments""" 2 3import numpy as np 4 5 6def binary_accuracy(probs: np.ndarray, outcomes: np.ndarray) -> float: 7 """compute accuracy where outcomes is binary ties count for half""" 8 pos_mask = probs > 0.5 9 neg_mask = probs < 0.5 10 draw_mask = probs == 0.5 11 correct = outcomes[pos_mask].sum() + (1.0 - outcomes[neg_mask]).sum() + 0.5 * draw_mask.sum() 12 return correct / probs.shape[0] 13 14def accuracy_without_draws(probs: np.ndarray, outcomes: np.ndarray) -> float: 15 """compute binary accuracy after first filtering out rows where the label is a draw""" 16 draw_mask = outcomes == 0.5 17 probs = probs[~draw_mask] 18 outcomes = outcomes[~draw_mask] 19 return binary_accuracy(probs, outcomes) 20 21 22def accuracy_with_draws(probs: np.ndarray, outcomes: np.ndarray, draw_margin=0.0) -> float: 23 """computes accuracy while allowing for ties""" 24 pos_pred_mask = probs > (0.5 + draw_margin) 25 neg_pred_mask = probs < (0.5 - draw_margin) 26 draw_pred_mask = np.abs(probs - draw_margin) <= draw_margin 27 correct = outcomes[pos_pred_mask].sum() 28 correct += (1.0 - outcomes)[neg_pred_mask].sum() 29 correct += 2 * outcomes[draw_pred_mask].sum() # lmao 30 return correct / outcomes.shape[0] 31 32 33def binary_log_loss(probs: np.ndarray, outcomes: np.ndarray, eps: float = 1e-6) -> float: 34 """compute log loss where outcome is binary 1.0 or 0.0""" 35 probs = np.clip(probs, eps, 1 - eps) 36 loss_array = -(np.log(probs) * outcomes) - (np.log(1.0 - probs) * (1.0 - outcomes)) 37 return loss_array.mean() 38 39 40def brier_score(probs: np.ndarray, outcomes: np.ndarray) -> float: 41 """compute the brier score, which is equivalent to the MSE""" 42 return np.square(probs - outcomes).mean() 43 44 45def binary_metrics_suite(probs: np.ndarray, outcomes: np.ndarray): 46 """a wrapper class for running a bunch of binary metrics""" 47 metrics = { 48 'accuracy': binary_accuracy(probs, outcomes), 49 'accuracy_without_draws' : accuracy_without_draws(probs, outcomes), 50 'log_loss': binary_log_loss(probs, outcomes), 51 'brier_score': brier_score(probs, outcomes), 52 } 53 return metrics
def
binary_accuracy(probs: numpy.ndarray, outcomes: numpy.ndarray) -> float:
7def binary_accuracy(probs: np.ndarray, outcomes: np.ndarray) -> float: 8 """compute accuracy where outcomes is binary ties count for half""" 9 pos_mask = probs > 0.5 10 neg_mask = probs < 0.5 11 draw_mask = probs == 0.5 12 correct = outcomes[pos_mask].sum() + (1.0 - outcomes[neg_mask]).sum() + 0.5 * draw_mask.sum() 13 return correct / probs.shape[0]
compute accuracy where outcomes is binary ties count for half
def
accuracy_without_draws(probs: numpy.ndarray, outcomes: numpy.ndarray) -> float:
15def accuracy_without_draws(probs: np.ndarray, outcomes: np.ndarray) -> float: 16 """compute binary accuracy after first filtering out rows where the label is a draw""" 17 draw_mask = outcomes == 0.5 18 probs = probs[~draw_mask] 19 outcomes = outcomes[~draw_mask] 20 return binary_accuracy(probs, outcomes)
compute binary accuracy after first filtering out rows where the label is a draw
def
accuracy_with_draws(probs: numpy.ndarray, outcomes: numpy.ndarray, draw_margin=0.0) -> float:
23def accuracy_with_draws(probs: np.ndarray, outcomes: np.ndarray, draw_margin=0.0) -> float: 24 """computes accuracy while allowing for ties""" 25 pos_pred_mask = probs > (0.5 + draw_margin) 26 neg_pred_mask = probs < (0.5 - draw_margin) 27 draw_pred_mask = np.abs(probs - draw_margin) <= draw_margin 28 correct = outcomes[pos_pred_mask].sum() 29 correct += (1.0 - outcomes)[neg_pred_mask].sum() 30 correct += 2 * outcomes[draw_pred_mask].sum() # lmao 31 return correct / outcomes.shape[0]
computes accuracy while allowing for ties
def
binary_log_loss( probs: numpy.ndarray, outcomes: numpy.ndarray, eps: float = 1e-06) -> float:
34def binary_log_loss(probs: np.ndarray, outcomes: np.ndarray, eps: float = 1e-6) -> float: 35 """compute log loss where outcome is binary 1.0 or 0.0""" 36 probs = np.clip(probs, eps, 1 - eps) 37 loss_array = -(np.log(probs) * outcomes) - (np.log(1.0 - probs) * (1.0 - outcomes)) 38 return loss_array.mean()
compute log loss where outcome is binary 1.0 or 0.0
def
brier_score(probs: numpy.ndarray, outcomes: numpy.ndarray) -> float:
41def brier_score(probs: np.ndarray, outcomes: np.ndarray) -> float: 42 """compute the brier score, which is equivalent to the MSE""" 43 return np.square(probs - outcomes).mean()
compute the brier score, which is equivalent to the MSE
def
binary_metrics_suite(probs: numpy.ndarray, outcomes: numpy.ndarray):
46def binary_metrics_suite(probs: np.ndarray, outcomes: np.ndarray): 47 """a wrapper class for running a bunch of binary metrics""" 48 metrics = { 49 'accuracy': binary_accuracy(probs, outcomes), 50 'accuracy_without_draws' : accuracy_without_draws(probs, outcomes), 51 'log_loss': binary_log_loss(probs, outcomes), 52 'brier_score': brier_score(probs, outcomes), 53 } 54 return metrics
a wrapper class for running a bunch of binary metrics