Source code for incerto.llm.generation

"""
Generation-specific uncertainty methods for LLMs.

These methods work with different generation strategies like
beam search, nucleus sampling, etc.
"""

from __future__ import annotations
import torch
from typing import List, Tuple


[docs] class BeamSearchUncertainty: """ Uncertainty estimation from beam search scores. Beam search maintains multiple hypotheses with scores. The score distribution indicates uncertainty. """
[docs] @staticmethod def compute_from_scores( beam_scores: torch.Tensor, temperature: float = 1.0, ) -> dict: """ Compute uncertainty from beam search scores. Args: beam_scores: Scores for each beam (num_beams,) temperature: Temperature for softmax (default: 1.0) Returns: Dictionary with: - entropy: Entropy over beam distribution - top_beam_prob: Probability of best beam - score_variance: Variance of beam scores """ # Convert scores to probabilities probs = torch.softmax(beam_scores / temperature, dim=0) # Entropy log_probs = torch.log(probs + 1e-10) entropy = -(probs * log_probs).sum() # Top beam probability top_beam_prob = probs.max().item() # Variance score_variance = beam_scores.var().item() return { "entropy": entropy.item(), "top_beam_prob": top_beam_prob, "score_variance": score_variance, "confidence": top_beam_prob, # Alias }
[docs] @staticmethod def diversity_among_beams( beam_sequences: List[List[int]], ) -> float: """ Measure diversity among beam search outputs. Args: beam_sequences: List of token ID sequences from beams Returns: Diversity score (0-1), higher = more diverse """ if len(beam_sequences) < 2: return 0.0 # Compute pairwise differences unique_count = len(set(tuple(seq) for seq in beam_sequences)) max_unique = len(beam_sequences) diversity = unique_count / max_unique return diversity
class NucleusSamplingUncertainty: """ Uncertainty for nucleus (top-p) sampling. Analyzes the probability mass distribution to determine how concentrated or spread out the generation is. """ @staticmethod def effective_vocabulary_size( logits: torch.Tensor, p: float = 0.9, ) -> int: """ Number of tokens needed to cover p probability mass. Args: logits: Token logits (..., vocab_size) p: Probability mass threshold (default: 0.9) Returns: Effective vocabulary size """ import torch.nn.functional as F probs = F.softmax(logits, dim=-1) sorted_probs, _ = torch.sort(probs, descending=True, dim=-1) cumsum = torch.cumsum(sorted_probs, dim=-1) nucleus_size = (cumsum <= p).sum(dim=-1) + 1 return nucleus_size.item() if nucleus_size.dim() == 0 else nucleus_size @staticmethod def probability_mass_concentration( logits: torch.Tensor, top_k: int = 10, ) -> float: """ Fraction of probability in top-k tokens. Args: logits: Token logits (..., vocab_size) top_k: Number of top tokens Returns: Probability mass in top-k (0-1) """ import torch.nn.functional as F probs = F.softmax(logits, dim=-1) top_k_probs, _ = torch.topk(probs, k=top_k, dim=-1) mass = top_k_probs.sum(dim=-1) return mass.item() if mass.dim() == 0 else mass
[docs] class IDontKnowDetection: """ Detect when the model is expressing uncertainty verbally. Common patterns: "I don't know", "I'm not sure", "unclear", etc. """ # Common uncertainty phrases UNCERTAINTY_PHRASES = [ "i don't know", "i'm not sure", "i am not sure", "uncertain", "unclear", "not certain", "cannot say", "can't say", "hard to say", "difficult to say", "no information", "cannot determine", "unable to determine", "ambiguous", "not enough information", "insufficient information", ]
[docs] @staticmethod def contains_uncertainty_phrase(text: str) -> bool: """ Check if text contains uncertainty phrases. Args: text: Generated text Returns: True if uncertainty phrase detected """ text_lower = text.lower() return any( phrase in text_lower for phrase in IDontKnowDetection.UNCERTAINTY_PHRASES )
[docs] @staticmethod def extract_confidence_from_hedging(text: str) -> dict: """ Detect hedging language and estimate confidence. Args: text: Generated text Returns: Dictionary with hedging indicators """ text_lower = text.lower() hedges = { "maybe": 0.5, "perhaps": 0.5, "possibly": 0.5, "might": 0.6, "could": 0.6, "probably": 0.7, "likely": 0.7, "seems": 0.6, "appears": 0.6, "suggest": 0.6, "indicate": 0.6, } found_hedges = [] min_confidence = 1.0 for hedge, confidence in hedges.items(): if hedge in text_lower: found_hedges.append(hedge) min_confidence = min(min_confidence, confidence) return { "hedges_found": found_hedges, "num_hedges": len(found_hedges), "estimated_confidence": min_confidence if found_hedges else 1.0, "contains_hedging": len(found_hedges) > 0, }
[docs] class ContrastiveDecoding: """ Uncertainty from contrastive decoding (comparing expert vs amateur models). Uses the difference in predictions between a strong and weak model to identify regions of high uncertainty. """
[docs] @staticmethod def compute_contrastive_score( expert_logits: torch.Tensor, amateur_logits: torch.Tensor, alpha: float = 0.5, ) -> torch.Tensor: """ Compute contrastive decoding score. Score = expert_prob - alpha * amateur_prob Args: expert_logits: Logits from expert/strong model amateur_logits: Logits from amateur/weak model alpha: Weight for amateur contribution Returns: Contrastive scores """ import torch.nn.functional as F expert_probs = F.softmax(expert_logits, dim=-1) amateur_probs = F.softmax(amateur_logits, dim=-1) contrastive_scores = expert_probs - alpha * amateur_probs return contrastive_scores
[docs] @staticmethod def disagreement_score( expert_logits: torch.Tensor, amateur_logits: torch.Tensor, ) -> torch.Tensor: """ Measure disagreement between expert and amateur. Args: expert_logits: Logits from expert model amateur_logits: Logits from amateur model Returns: Disagreement score (KL divergence) """ import torch.nn.functional as F expert_log_probs = F.log_softmax(expert_logits, dim=-1) amateur_probs = F.softmax(amateur_logits, dim=-1) # KL divergence: D_KL(amateur || expert) kl_div = F.kl_div(expert_log_probs, amateur_probs, reduction="none") disagreement = kl_div.sum(dim=-1) return disagreement