Skip to content

Predictors

Module for LLM predictors.

base_predictor

Base module for predictors in the promptolution library.

BasePredictor

Bases: ABC

Abstract base class for predictors in the promptolution library.

This class defines the interface that all concrete predictor implementations should follow.

Attributes:

Name Type Description
llm

The language model used for generating predictions.

classes List[str]

The list of valid class labels.

config ExperimentConfig

Experiment configuration overwriting defaults

Source code in promptolution/predictors/base_predictor.py
class BasePredictor(ABC):
    """Abstract base class for predictors in the promptolution library.

    This class defines the interface that all concrete predictor implementations should follow.

    Attributes:
        llm: The language model used for generating predictions.
        classes (List[str]): The list of valid class labels.
        config (ExperimentConfig): Experiment configuration overwriting defaults
    """

    def __init__(self, llm: "BaseLLM", config: "ExperimentConfig" = None):
        """Initialize the predictor with a language model and configuration.

        Args:
            llm: Language model to use for prediction.
            config: Configuration for the predictor.
        """
        self.llm = llm

        if config is not None:
            config.apply_to(self)

    def predict(
        self,
        prompts: List[str],
        xs: np.ndarray,
        system_prompts: List[str] = None,
        return_seq: bool = False,
    ) -> np.ndarray:
        """Abstract method to make predictions based on prompts and input data.

        Args:
            prompts: Prompt or list of prompts to use for prediction.
            xs: Array of input data.
            system_prompts: List of system prompts to use for the language model.
            return_seq: Whether to return the generating sequence.

        Returns:
            Array of predictions, optionally with sequences.
        """
        if isinstance(prompts, str):
            prompts = [prompts]

        inputs = [prompt + "\n" + x for prompt, x in zip(prompts, xs)]
        outputs = self.llm.get_response(inputs, system_prompts=system_prompts)
        preds = self._extract_preds(outputs)

        if return_seq:
            seqs = [f"{x}\n{out}" for x, out in zip(xs, outputs)]
            seqs = np.array(seqs)

        return preds if not return_seq else (preds, seqs)

    @abstractmethod
    def _extract_preds(self, preds: List[str]) -> np.ndarray:
        """Extract class labels from the predictions, based on the list of valid class labels.

        Args:
            preds: The raw predictions from the language model.

        Returns:
            np.ndarray: Extracted predictions.
        """
        raise NotImplementedError

__init__(llm, config=None)

Initialize the predictor with a language model and configuration.

Parameters:

Name Type Description Default
llm BaseLLM

Language model to use for prediction.

required
config ExperimentConfig

Configuration for the predictor.

None
Source code in promptolution/predictors/base_predictor.py
def __init__(self, llm: "BaseLLM", config: "ExperimentConfig" = None):
    """Initialize the predictor with a language model and configuration.

    Args:
        llm: Language model to use for prediction.
        config: Configuration for the predictor.
    """
    self.llm = llm

    if config is not None:
        config.apply_to(self)

predict(prompts, xs, system_prompts=None, return_seq=False)

Abstract method to make predictions based on prompts and input data.

Parameters:

Name Type Description Default
prompts List[str]

Prompt or list of prompts to use for prediction.

required
xs ndarray

Array of input data.

required
system_prompts List[str]

List of system prompts to use for the language model.

None
return_seq bool

Whether to return the generating sequence.

False

Returns:

Type Description
ndarray

Array of predictions, optionally with sequences.

Source code in promptolution/predictors/base_predictor.py
def predict(
    self,
    prompts: List[str],
    xs: np.ndarray,
    system_prompts: List[str] = None,
    return_seq: bool = False,
) -> np.ndarray:
    """Abstract method to make predictions based on prompts and input data.

    Args:
        prompts: Prompt or list of prompts to use for prediction.
        xs: Array of input data.
        system_prompts: List of system prompts to use for the language model.
        return_seq: Whether to return the generating sequence.

    Returns:
        Array of predictions, optionally with sequences.
    """
    if isinstance(prompts, str):
        prompts = [prompts]

    inputs = [prompt + "\n" + x for prompt, x in zip(prompts, xs)]
    outputs = self.llm.get_response(inputs, system_prompts=system_prompts)
    preds = self._extract_preds(outputs)

    if return_seq:
        seqs = [f"{x}\n{out}" for x, out in zip(xs, outputs)]
        seqs = np.array(seqs)

    return preds if not return_seq else (preds, seqs)

classifier

Module for classification predictors.

FirstOccurrenceClassifier

Bases: BasePredictor

A predictor class for classification tasks using language models.

This class takes a language model and a list of classes, and provides a method to predict classes for given prompts and input data. The class labels are extracted by matching the words in the prediction with the list of valid class labels. The first occurrence of a valid class label in the prediction is used as the predicted class. If no valid class label is found, the first class label in the list is used as the default prediction.

Attributes:

Name Type Description
llm

The language model used for generating predictions.

classes List[str]

The list of valid class labels.

config ExperimentConfig

Configuration for the classifier, overriding defaults.

Inherits from

BasePredictor: The base class for predictors in the promptolution library.

Source code in promptolution/predictors/classifier.py
class FirstOccurrenceClassifier(BasePredictor):
    """A predictor class for classification tasks using language models.

    This class takes a language model and a list of classes, and provides a method
    to predict classes for given prompts and input data. The class labels are extracted
    by matching the words in the prediction with the list of valid class labels.
    The first occurrence of a valid class label in the prediction is used as the predicted class.
    If no valid class label is found, the first class label in the list is used as the default prediction.

    Attributes:
        llm: The language model used for generating predictions.
        classes (List[str]): The list of valid class labels.
        config (ExperimentConfig, optional): Configuration for the classifier, overriding defaults.

    Inherits from:
        BasePredictor: The base class for predictors in the promptolution library.
    """

    def __init__(self, llm, classes, config: "ExperimentConfig" = None):
        """Initialize the FirstOccurrenceClassifier.

        Args:
            llm: The language model to use for predictions.
            classes (List[str]): The list of valid class labels.
            config (ExperimentConfig, optional): Configuration for the classifier, overriding defaults.
        """
        assert all([c.islower() for c in classes]), "Class labels should be lowercase."
        self.classes = classes

        self.extraction_description = (
            f"The task is to classify the texts into one of those classes: {', '.join(classes)}."
            "The first occurrence of a valid class label in the prediction is used as the predicted class."
        )

        super().__init__(llm, config)

    def _extract_preds(self, preds: List[str]) -> np.ndarray:
        """Extract class labels from the predictions, based on the list of valid class labels.

        Args:
            preds: The raw predictions from the language model.
        """
        response = []
        for pred in preds:
            predicted_class = self.classes[0]  # use first class as default pred
            for word in pred.split():
                word = "".join([c for c in word if c.isalnum()]).lower()
                if word in self.classes:
                    predicted_class = word
                    break

            response.append(predicted_class)

        response = np.array(response)
        return response

__init__(llm, classes, config=None)

Initialize the FirstOccurrenceClassifier.

Parameters:

Name Type Description Default
llm

The language model to use for predictions.

required
classes List[str]

The list of valid class labels.

required
config ExperimentConfig

Configuration for the classifier, overriding defaults.

None
Source code in promptolution/predictors/classifier.py
def __init__(self, llm, classes, config: "ExperimentConfig" = None):
    """Initialize the FirstOccurrenceClassifier.

    Args:
        llm: The language model to use for predictions.
        classes (List[str]): The list of valid class labels.
        config (ExperimentConfig, optional): Configuration for the classifier, overriding defaults.
    """
    assert all([c.islower() for c in classes]), "Class labels should be lowercase."
    self.classes = classes

    self.extraction_description = (
        f"The task is to classify the texts into one of those classes: {', '.join(classes)}."
        "The first occurrence of a valid class label in the prediction is used as the predicted class."
    )

    super().__init__(llm, config)

MarkerBasedClassifier

Bases: BasePredictor

A predictor class for classification tasks using language models.

This class takes a language model and a list of classes, and provides a method to predict classes for given prompts and input data. The class labels are extracted.

Attributes:

Name Type Description
llm

The language model used for generating predictions.

classes List[str]

The list of valid class labels.

marker str

The marker to use for extracting the class label.

Inherits from

BasePredictor: The base class for predictors in the promptolution library.

Source code in promptolution/predictors/classifier.py
class MarkerBasedClassifier(BasePredictor):
    """A predictor class for classification tasks using language models.

    This class takes a language model and a list of classes, and provides a method
    to predict classes for given prompts and input data. The class labels are extracted.

    Attributes:
        llm: The language model used for generating predictions.
        classes (List[str]): The list of valid class labels.
        marker (str): The marker to use for extracting the class label.

    Inherits from:
        BasePredictor: The base class for predictors in the promptolution library.
    """

    def __init__(
        self,
        llm,
        classes=None,
        begin_marker="<final_answer>",
        end_marker="</final_answer>",
        config: "ExperimentConfig" = None,
    ):
        """Initialize the MarkerBasedClassifier.

        Args:
            llm: The language model to use for predictions.
            classes (List[str]): The list of valid class labels. If None, does not force any class.
            begin_marker (str): The marker to use for extracting the class label.
            end_marker (str): The marker to use for extracting the class label.
            config (ExperimentConfig, optional): Configuration for the classifier, overriding defaults.
        """
        self.classes = classes
        self.begin_marker = begin_marker
        self.end_marker = end_marker

        if classes is not None:
            assert all([c.islower() for c in classes]), "Class labels should be lowercase."

            self.extraction_description = (
                f"The task is to classify the texts into one of those classes: {','.join(classes)}."
                f"The class label is extracted from the text that are between these markers: {begin_marker} and {end_marker}."
            )
        else:
            self.extraction_description = f"The class label is extracted from the text that are between these markers: {begin_marker} and {end_marker}."

        super().__init__(llm, config)

    def _extract_preds(self, preds: List[str]) -> np.ndarray:
        """Extract class labels from the predictions, by extracting the text following the marker.

        Args:
            preds: The raw predictions from the language model.
        """
        response = []
        for pred in preds:
            pred = pred.split(self.begin_marker)[-1].split(self.end_marker)[0].strip().lower()
            if self.classes is not None and pred not in self.classes:
                pred = self.classes[0]

            response.append(pred)

        response = np.array(response)
        return response

__init__(llm, classes=None, begin_marker='<final_answer>', end_marker='</final_answer>', config=None)

Initialize the MarkerBasedClassifier.

Parameters:

Name Type Description Default
llm

The language model to use for predictions.

required
classes List[str]

The list of valid class labels. If None, does not force any class.

None
begin_marker str

The marker to use for extracting the class label.

'<final_answer>'
end_marker str

The marker to use for extracting the class label.

'</final_answer>'
config ExperimentConfig

Configuration for the classifier, overriding defaults.

None
Source code in promptolution/predictors/classifier.py
def __init__(
    self,
    llm,
    classes=None,
    begin_marker="<final_answer>",
    end_marker="</final_answer>",
    config: "ExperimentConfig" = None,
):
    """Initialize the MarkerBasedClassifier.

    Args:
        llm: The language model to use for predictions.
        classes (List[str]): The list of valid class labels. If None, does not force any class.
        begin_marker (str): The marker to use for extracting the class label.
        end_marker (str): The marker to use for extracting the class label.
        config (ExperimentConfig, optional): Configuration for the classifier, overriding defaults.
    """
    self.classes = classes
    self.begin_marker = begin_marker
    self.end_marker = end_marker

    if classes is not None:
        assert all([c.islower() for c in classes]), "Class labels should be lowercase."

        self.extraction_description = (
            f"The task is to classify the texts into one of those classes: {','.join(classes)}."
            f"The class label is extracted from the text that are between these markers: {begin_marker} and {end_marker}."
        )
    else:
        self.extraction_description = f"The class label is extracted from the text that are between these markers: {begin_marker} and {end_marker}."

    super().__init__(llm, config)