Source code for incerto.calibration.visual

import torch
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter1d

from .utils import get_bin_stats
from .metrics import _find_sigma_star



[docs]
def plot_reliability_diagram(
    logits: torch.Tensor,
    labels: torch.Tensor,
    n_bins: int = 10,
    ax=None,
    title: str = "Reliability Diagram",
):
    """
    Plot a reliability diagram comparing confidence vs accuracy.
    """
    probs = F.softmax(logits, dim=1).cpu().detach().numpy()
    confidences = np.max(probs, axis=1)
    predictions = np.argmax(probs, axis=1)
    accuracies = (predictions == labels.cpu().numpy()).astype(float)

    bin_conf, bin_acc, weight = get_bin_stats(confidences, accuracies, n_bins)

    if ax is None:
        fig, ax = plt.subplots()

    # Perfect calibration
    ax.plot([0, 1], [0, 1], linestyle="--", label="Perfect")
    # Empirical
    ax.plot(bin_conf, bin_acc, marker="o", label="Empirical")
    # Gap bars
    centers = (np.arange(n_bins) + 0.5) / n_bins
    ax.bar(
        centers,
        bin_acc - bin_conf,
        width=1.0 / n_bins,
        alpha=0.3,
        edgecolor="black",
        label="Gap",
    )

    ax.set_xlabel("Confidence")
    ax.set_ylabel("Accuracy")
    ax.set_title(title)
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.legend()

    return ax




[docs]
def plot_confidence_histogram(
    logits: torch.Tensor,
    n_bins: int = 10,
    ax=None,
    title: str = "Confidence Histogram",
):
    """
    Plot a histogram of model confidences (max softmax probability).
    """
    probs = F.softmax(logits, dim=1).cpu().detach().numpy()
    confidences = np.max(probs, axis=1)

    if ax is None:
        fig, ax = plt.subplots()

    ax.hist(confidences, bins=n_bins, range=(0, 1), edgecolor="black")
    ax.set_xlabel("Confidence")
    ax.set_ylabel("Count")
    ax.set_title(title)

    return ax




[docs]
def plot_calibration_curve(
    logits: torch.Tensor,
    labels: torch.Tensor,
    n_bins: int = 10,
    ax=None,
    title: str = "Calibration Curve",
):
    """
    Plot calibration curve: accuracy vs. confidence bin centers.
    """
    probs = F.softmax(logits, dim=1).cpu().detach().numpy()
    confidences = np.max(probs, axis=1)
    predictions = np.argmax(probs, axis=1)
    accuracies = (predictions == labels.cpu().numpy()).astype(float)

    bin_conf, bin_acc, _ = get_bin_stats(confidences, accuracies, n_bins)
    centers = (np.arange(n_bins) + 0.5) / n_bins

    if ax is None:
        fig, ax = plt.subplots()

    ax.plot(centers, bin_acc, marker="o")
    ax.set_xlabel("Confidence Bin Center")
    ax.set_ylabel("Accuracy")
    ax.set_title(title)
    ax.set_ylim(0, 1)

    return ax



def _smooth_calibration_curve(
    confidences: np.ndarray, accuracies: np.ndarray, sigma: float, n_grid: int = 1000
):
    """
    Compute the Nadaraya-Watson smoothed calibration curve.

    Returns:
        Tuple of (grid, r_sigma, density) where r_sigma is the smoothed
        P(Y=1 | f=t) and density is the kernel density of predictions.
    """
    n = len(confidences)
    dx = 1.0 / n_grid
    bin_indices = np.clip((confidences / dx).astype(int), 0, n_grid - 1)

    # Histogram of y values (numerator of Nadaraya-Watson)
    y_hist = np.zeros(n_grid)
    np.add.at(y_hist, bin_indices, accuracies / n)

    # Histogram of counts (denominator / density)
    count_hist = np.zeros(n_grid)
    np.add.at(count_hist, bin_indices, 1.0 / n)

    sigma_pixels = sigma / dx
    smoothed_y = gaussian_filter1d(y_hist, sigma_pixels, mode="reflect")
    smoothed_density = gaussian_filter1d(count_hist, sigma_pixels, mode="reflect")

    grid = np.linspace(dx / 2, 1 - dx / 2, n_grid)
    mask = smoothed_density > 1e-10
    r = np.full_like(grid, np.nan)
    r[mask] = smoothed_y[mask] / smoothed_density[mask]

    return grid, r, smoothed_density



[docs]
def plot_smooth_reliability_diagram(
    logits: torch.Tensor,
    labels: torch.Tensor,
    ax=None,
    title: str = "Smooth Reliability Diagram",
):
    """
    Plot a smooth reliability diagram using kernel smoothing.

    Uses the SmoothECE framework: Nadaraya-Watson kernel regression with
    automatic bandwidth selection via the fixed-point condition.

    Reference:
        Blasiok & Nakkiran, "Smooth ECE: Principled Reliability Diagrams
        via Kernel Smoothing" (ICLR 2024)

    Args:
        logits: Model logits (N, C)
        labels: True labels (N,)
        ax: Optional matplotlib axes
        title: Plot title

    Returns:
        matplotlib Axes
    """
    probs = F.softmax(logits, dim=1).cpu().detach().numpy()
    confidences = np.max(probs, axis=1)
    predictions = np.argmax(probs, axis=1)
    accuracies = (predictions == labels.cpu().numpy()).astype(float)

    # Find optimal bandwidth and compute smooth curve
    sigma_star = _find_sigma_star(confidences, accuracies)
    grid, r_sigma, density = _smooth_calibration_curve(
        confidences, accuracies, sigma_star
    )

    if ax is None:
        fig, ax = plt.subplots()

    # Perfect calibration
    ax.plot([0, 1], [0, 1], linestyle="--", color="gray", label="Perfect")

    # Smooth calibration curve with linewidth proportional to density
    # Normalize density for linewidth scaling
    max_density = np.max(density)
    if max_density > 0:
        norm_density = density / max_density
    else:
        norm_density = np.ones_like(density)

    # Draw the curve as line segments with varying width
    valid = ~np.isnan(r_sigma)
    if np.any(valid):
        # Draw thin segments with width proportional to density
        for i in range(len(grid) - 1):
            if valid[i] and valid[i + 1]:
                lw = 1.0 + 3.0 * norm_density[i]
                ax.plot(
                    grid[i : i + 2],
                    r_sigma[i : i + 2],
                    color="C3",
                    linewidth=lw,
                    solid_capstyle="round",
                )
        # Invisible line for legend
        ax.plot([], [], color="C3", linewidth=2, label=f"smECE = {sigma_star:.4f}")

    # Tick marks showing raw data density
    ax.scatter(
        confidences,
        -0.02 * np.ones_like(confidences),
        marker="|",
        color="gray",
        alpha=0.3,
        s=10,
        zorder=1,
    )

    ax.set_xlabel("Confidence")
    ax.set_ylabel("Accuracy")
    ax.set_title(title)
    ax.set_xlim(0, 1)
    ax.set_ylim(-0.05, 1.05)
    ax.legend()

    return ax