Module skplumber.metrics
Expand source code
import typing as t
import math
import pandas as pd
from sklearn.metrics import mean_squared_error as mse, mean_squared_log_error as msle
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from skplumber.consts import ProblemType, OptimizationDirection
class Metric:
def __init__(
self,
name: str,
problem_type: ProblemType,
compute: t.Callable[[pd.Series, pd.Series], float],
opt_dir: OptimizationDirection,
is_better_than: t.Callable[[float, float], bool],
best_value: float,
worst_value: float,
) -> None:
"""
Parameters
----------
name : str
The name of this metric e.g. "RMSE", or "Accuracy".
problem_type : ProblemType
The problem type this metric is used to asses e.g.
Regression for RMSE or Classification for Accuracy.
compute : function
The method that actually computes the score between the
truth `y` and the `predictions`.
opt_dir : OptimizationDirection
The direction an optimizer would go to improve this metric e.g.
for RMSE the goal is to minimize. For accuracy, the goal is to
maximize.
is_better_than : function
Should return `True` if the first arg (a) is better than the
second arg (b) in regards to this metric. E.g. if this metric were
RMSE, `a=25`, and second `b=30`, then this method would
return `True`.
best_value : float
The value a perfect model would get under this metric e.g. for
accuracy it would be 1.0.
worst_value : float
The worst value a model could achieve for this metric e.g. for
accuracy it would be 0.0.
"""
self.name = name
self.problem_type = problem_type
self._compute = compute
self.opt_dir = opt_dir
self.is_better_than = is_better_than
self.best_value = best_value
self.worst_value = worst_value
def __call__(self, y: pd.Series, predictions: pd.Series) -> float:
return self._compute(y, predictions)
rmse = Metric(
"Root Mean Squared Error",
ProblemType.REGRESSION,
lambda y, preds: math.sqrt(mse(y, preds)),
OptimizationDirection.MINIMIZE,
lambda a, b: a < b,
0.0,
float("inf"),
)
rmsle = Metric(
"Root Mean Squared Log Error",
ProblemType.REGRESSION,
lambda y, preds: math.sqrt(msle(y, preds)),
OptimizationDirection.MINIMIZE,
lambda a, b: a < b,
0.0,
float("inf"),
)
f1macro = Metric(
"F1 Macro",
ProblemType.CLASSIFICATION,
lambda y, preds: f1_score(y, preds, average="macro"),
OptimizationDirection.MAXIMIZE,
lambda a, b: a > b,
1.0,
0.0,
)
accuracy = Metric(
"Accuracy",
ProblemType.CLASSIFICATION,
accuracy_score,
OptimizationDirection.MAXIMIZE,
lambda a, b: a > b,
1.0,
0.0,
)
metrics: t.Dict[str, Metric] = {
"rmse": rmse,
"rmsle": rmsle,
"f1macro": f1macro,
"accuracy": accuracy,
}
default_metrics: t.Dict[ProblemType, Metric] = {
ProblemType.REGRESSION: rmse,
ProblemType.CLASSIFICATION: accuracy,
}
Classes
class Metric (name: str, problem_type: ProblemType, compute: Callable[[pandas.core.series.Series, pandas.core.series.Series], float], opt_dir: OptimizationDirection, is_better_than: Callable[[float, float], bool], best_value: float, worst_value: float)
-
Parameters
name
:str
- The name of this metric e.g. "RMSE", or "Accuracy".
problem_type
:ProblemType
- The problem type this metric is used to asses e.g. Regression for RMSE or Classification for Accuracy.
compute
:function
- The method that actually computes the score between the
truth
y
and thepredictions
. opt_dir
:OptimizationDirection
- The direction an optimizer would go to improve this metric e.g. for RMSE the goal is to minimize. For accuracy, the goal is to maximize.
is_better_than
:function
- Should return
True
if the first arg (a) is better than the second arg (b) in regards to this metric. E.g. if this metric were RMSE,a=25
, and secondb=30
, then this method would returnTrue
. best_value
:float
- The value a perfect model would get under this metric e.g. for accuracy it would be 1.0.
worst_value
:float
- The worst value a model could achieve for this metric e.g. for accuracy it would be 0.0.
Expand source code
class Metric: def __init__( self, name: str, problem_type: ProblemType, compute: t.Callable[[pd.Series, pd.Series], float], opt_dir: OptimizationDirection, is_better_than: t.Callable[[float, float], bool], best_value: float, worst_value: float, ) -> None: """ Parameters ---------- name : str The name of this metric e.g. "RMSE", or "Accuracy". problem_type : ProblemType The problem type this metric is used to asses e.g. Regression for RMSE or Classification for Accuracy. compute : function The method that actually computes the score between the truth `y` and the `predictions`. opt_dir : OptimizationDirection The direction an optimizer would go to improve this metric e.g. for RMSE the goal is to minimize. For accuracy, the goal is to maximize. is_better_than : function Should return `True` if the first arg (a) is better than the second arg (b) in regards to this metric. E.g. if this metric were RMSE, `a=25`, and second `b=30`, then this method would return `True`. best_value : float The value a perfect model would get under this metric e.g. for accuracy it would be 1.0. worst_value : float The worst value a model could achieve for this metric e.g. for accuracy it would be 0.0. """ self.name = name self.problem_type = problem_type self._compute = compute self.opt_dir = opt_dir self.is_better_than = is_better_than self.best_value = best_value self.worst_value = worst_value def __call__(self, y: pd.Series, predictions: pd.Series) -> float: return self._compute(y, predictions)