Source code for incerto.llm.verbalized

"""
Verbalized uncertainty for LLMs.

These methods use prompting to elicit uncertainty estimates directly
from the language model through natural language.
"""

from __future__ import annotations

import re



[docs]
class VerbalizedConfidence:
    """
    Ask the model to verbalize its confidence.

    Prompt: "How confident are you in this answer? (0-100%)"
    Extract and return the confidence score.
    """


[docs]
    @staticmethod
    def extract_percentage(response: str) -> float | None:
        """
        Extract a percentage value from text.

        Args:
            response: Text containing a percentage

        Returns:
            Confidence value (0-1) or None if not found
        """
        # Look for patterns like "85%", "85 percent", "0.85"
        patterns = [
            r"(\d+(?:\.\d+)?)\s*%",  # 85%
            r"(\d+(?:\.\d+)?)\s*percent",  # 85 percent
            r"(\d+(?:\.\d+)?)\s*/\s*100",  # 85/100
            r"confidence.*?(\d+(?:\.\d+)?)",  # confidence: 85
        ]

        for pattern in patterns:
            match = re.search(pattern, response, re.IGNORECASE)
            if match:
                value = float(match.group(1))
                # Normalize to 0-1
                if value > 1.0:
                    value = value / 100.0
                return max(0.0, min(1.0, value))

        return None



[docs]
    @staticmethod
    def get_confidence_prompt(question: str, answer: str) -> str:
        """
        Generate a prompt to elicit confidence.

        Args:
            question: The original question
            answer: The model's answer

        Returns:
            Prompt string
        """
        return f"""Question: {question}
Your answer: {answer}

On a scale from 0% to 100%, how confident are you that your answer is correct?
Provide only the percentage value."""





[docs]
class PTrue:
    """
    P(True) - asking the model the probability its answer is correct.

    Prompt: "What is the probability that your answer is correct?"
    """


[docs]
    @staticmethod
    def get_ptrue_prompt(question: str, answer: str) -> str:
        """
        Generate P(True) prompt.

        Args:
            question: The original question
            answer: The model's answer

        Returns:
            Prompt string
        """
        return f"""Question: {question}
Your answer: {answer}

What is the probability (between 0.0 and 1.0) that your answer is correct?
Provide only the numerical probability value."""



[docs]
    @staticmethod
    def extract_probability(response: str) -> float | None:
        """
        Extract probability value from response.

        Args:
            response: Text containing a probability

        Returns:
            Probability value (0-1) or None
        """
        # Look for decimal numbers between 0 and 1
        patterns = [
            r"(\d+\.\d+)",  # 0.85
            r"(\d+)%",  # 85%
            r"(\d+)\s+percent",  # 85 percent
            r"\b([01])\b",  # standalone 0 or 1
        ]

        for pattern in patterns:
            match = re.search(pattern, response, re.IGNORECASE)
            if match:
                value = float(match.group(1))
                if value > 1.0:
                    value = value / 100.0
                return max(0.0, min(1.0, value))

        return None





[docs]
class SelfEvaluation:
    """
    Multi-turn self-critique for uncertainty.

    Ask the model to evaluate its own answer and look for inconsistencies.
    """


[docs]
    @staticmethod
    def get_critique_prompt(question: str, answer: str) -> str:
        """
        Generate self-critique prompt.

        Args:
            question: The original question
            answer: The model's answer

        Returns:
            Prompt for self-evaluation
        """
        return f"""Question: {question}
Proposed answer: {answer}

Please critically evaluate this answer. Consider:
1. Is it factually accurate?
2. Is it complete?
3. Are there any potential errors or ambiguities?
4. What is the likelihood this answer is correct?

Provide your evaluation and a confidence score (0-100%)."""





[docs]
class BidirectionalConsistency:
    """
    Check consistency by asking the question in different ways.

    If the model gives different answers to equivalent questions,
    it indicates high uncertainty.
    """


[docs]
    @staticmethod
    def paraphrase_prompts(question: str) -> list[str]:
        """
        Generate paraphrased versions of a question.

        Args:
            question: Original question

        Returns:
            List of paraphrased questions
        """
        # Simple template-based paraphrasing
        # In practice, you'd use a paraphrasing model
        paraphrases = [
            question,
            f"Can you tell me: {question}",
            f"What would you say about: {question}",
            f"I'd like to know: {question}",
        ]
        return paraphrases



[docs]
    @staticmethod
    def compute_consistency(answers: list[str], match: str = "tokens") -> float:
        """Compute consistency across answers to paraphrased questions.

        Args:
            answers: List of answers to paraphrased questions.
            match: Matching strategy. ``"tokens"`` (default) uses the mean
                pairwise Jaccard overlap of lowercased word sets — robust to
                small wording differences. ``"exact"`` uses the fraction of
                identical answers (legacy strict behaviour); best when
                answers are short canonical strings.

        Returns:
            Consistency score in ``[0, 1]``; higher means more consistent.
        """
        if len(answers) < 2:
            return 1.0

        if match == "exact":
            unique_answers = len(set(answers))
            return 1.0 - ((unique_answers - 1) / (len(answers) - 1))

        # Token-set Jaccard, averaged over all unordered pairs
        token_sets = [set(a.lower().split()) for a in answers if a.strip()]
        if len(token_sets) < 2:
            return 1.0
        sims = []
        for i in range(len(token_sets)):
            for j in range(i + 1, len(token_sets)):
                a, b = token_sets[i], token_sets[j]
                union = a | b
                if not union:
                    continue
                sims.append(len(a & b) / len(union))
        return float(sum(sims) / len(sims)) if sims else 0.0