Skip to content

Predictors

Module for LLM predictors.

get_predictor(downstream_llm=None, type='first_occurrence', *args, **kwargs)

Factory function to create and return a predictor instance.

This function supports three types of predictors: 1. DummyPredictor: A mock predictor for testing purposes when no downstream_llm is provided. 2. FirstOccurrenceClassificator: A predictor that classifies based on first occurrence of the label. 3. MarkerBasedClassificator: A predictor that classifies based on a marker.

Parameters:

Name Type Description Default
downstream_llm

The language model to use for prediction. If None, returns a DummyPredictor.

None
type Literal['first_occurrence', 'marker']

The type of predictor to create: - "first_occurrence" (default) for FirstOccurrenceClassificator - "marker" for MarkerBasedClassificator

'first_occurrence'
*args

Variable length argument list passed to the predictor constructor.

()
**kwargs

Arbitrary keyword arguments passed to the predictor constructor.

{}

Returns:

Type Description

An instance of DummyPredictor, FirstOccurrenceClassificator, or MarkerBasedClassificator.

Source code in promptolution/predictors/__init__.py
def get_predictor(
    downstream_llm=None, type: Literal["first_occurence", "marker"] = "first_occurrence", *args, **kwargs
):
    """Factory function to create and return a predictor instance.

    This function supports three types of predictors:
    1. DummyPredictor: A mock predictor for testing purposes when no downstream_llm is provided.
    2. FirstOccurrenceClassificator: A predictor that classifies based on first occurrence of the label.
    3. MarkerBasedClassificator: A predictor that classifies based on a marker.

    Args:
        downstream_llm: The language model to use for prediction. If None, returns a DummyPredictor.
        type (Literal["first_occurrence", "marker"]): The type of predictor to create:
                    - "first_occurrence" (default) for FirstOccurrenceClassificator
                    - "marker" for MarkerBasedClassificator
        *args: Variable length argument list passed to the predictor constructor.
        **kwargs: Arbitrary keyword arguments passed to the predictor constructor.

    Returns:
        An instance of DummyPredictor, FirstOccurrenceClassificator, or MarkerBasedClassificator.
    """
    if downstream_llm is None:
        return DummyPredictor("", *args, **kwargs)

    if type == "first_occurrence":
        return FirstOccurrenceClassificator(downstream_llm, *args, **kwargs)
    elif type == "marker":
        return MarkerBasedClassificator(downstream_llm, *args, **kwargs)
    else:
        raise ValueError(f"Invalid predictor type: '{type}'")

base_predictor

Base module for predictors.

BasePredictor

Abstract base class for predictors in the promptolution library.

This class defines the interface that all concrete predictor implementations should follow.

Attributes:

Name Type Description
llm

The language model used for generating predictions.

Methods:

Name Description
predict

An abstract method that should be implemented by subclasses to make predictions based on prompts and input data.

Source code in promptolution/predictors/base_predictor.py
class BasePredictor:
    """Abstract base class for predictors in the promptolution library.

    This class defines the interface that all concrete predictor implementations should follow.

    Attributes:
        llm: The language model used for generating predictions.


    Methods:
        predict: An abstract method that should be implemented by subclasses
                 to make predictions based on prompts and input data.
    """

    def __init__(self, llm: BaseLLM):
        """Initialize the BasePredictor.

        Args:
            llm: The language model to use for predictions.
            classes (List[str]): The list of valid class labels.
        """
        self.llm = llm

    def predict(self, prompts: List[str], xs: np.ndarray, return_seq: bool = False) -> np.ndarray:
        """Abstract method to make predictions based on prompts and input data.

        Args:
            prompts (List[str]): List of prompts to use for prediction.
            xs (np.ndarray): Array of input data.
            return_seq (bool, optional): whether to return the generating sequence

        Returns:
            np.ndarray: Array of predictions.

        Raises:
            NotImplementedError: If not implemented by a subclass.
        """
        if isinstance(prompts, str):
            prompts = [prompts]

        outputs = self.llm.get_response([prompt + "\n" + x for prompt in prompts for x in xs])
        preds = self._extract_preds(outputs)

        shape = (len(prompts), len(xs))
        outputs = np.array(outputs).reshape(shape)
        preds = preds.reshape(shape)
        xs = np.array(xs)

        if return_seq:
            seqs = []
            for output in outputs:
                seqs.append([f"{x}\n{out}" for x, out in zip(xs, output)])

            seqs = np.array(seqs)

            return preds, seqs

        return preds

    def _extract_preds(self, preds: List[str], shape: Tuple[int, int]) -> np.ndarray:
        """Extract class labels from the predictions, based on the list of valid class labels.

        Args:
            preds: The raw predictions from the language model.
            shape: The shape of the output array: (n_prompts, n_samples).
        """
        raise NotImplementedError

__init__(llm)

Initialize the BasePredictor.

Parameters:

Name Type Description Default
llm BaseLLM

The language model to use for predictions.

required
classes List[str]

The list of valid class labels.

required
Source code in promptolution/predictors/base_predictor.py
def __init__(self, llm: BaseLLM):
    """Initialize the BasePredictor.

    Args:
        llm: The language model to use for predictions.
        classes (List[str]): The list of valid class labels.
    """
    self.llm = llm

predict(prompts, xs, return_seq=False)

Abstract method to make predictions based on prompts and input data.

Parameters:

Name Type Description Default
prompts List[str]

List of prompts to use for prediction.

required
xs ndarray

Array of input data.

required
return_seq bool

whether to return the generating sequence

False

Returns:

Type Description
ndarray

np.ndarray: Array of predictions.

Raises:

Type Description
NotImplementedError

If not implemented by a subclass.

Source code in promptolution/predictors/base_predictor.py
def predict(self, prompts: List[str], xs: np.ndarray, return_seq: bool = False) -> np.ndarray:
    """Abstract method to make predictions based on prompts and input data.

    Args:
        prompts (List[str]): List of prompts to use for prediction.
        xs (np.ndarray): Array of input data.
        return_seq (bool, optional): whether to return the generating sequence

    Returns:
        np.ndarray: Array of predictions.

    Raises:
        NotImplementedError: If not implemented by a subclass.
    """
    if isinstance(prompts, str):
        prompts = [prompts]

    outputs = self.llm.get_response([prompt + "\n" + x for prompt in prompts for x in xs])
    preds = self._extract_preds(outputs)

    shape = (len(prompts), len(xs))
    outputs = np.array(outputs).reshape(shape)
    preds = preds.reshape(shape)
    xs = np.array(xs)

    if return_seq:
        seqs = []
        for output in outputs:
            seqs.append([f"{x}\n{out}" for x, out in zip(xs, output)])

        seqs = np.array(seqs)

        return preds, seqs

    return preds

DummyPredictor

Bases: BasePredictor

A dummy predictor implementation for testing purposes.

This predictor generates random predictions from the list of possible classes.

Attributes:

Name Type Description
model_id str

Always set to "dummy".

classes List[str]

List of possible class labels.

Methods:

Name Description
predict

Generates random predictions for the given prompts and input data.

Source code in promptolution/predictors/base_predictor.py
class DummyPredictor(BasePredictor):
    """A dummy predictor implementation for testing purposes.

    This predictor generates random predictions from the list of possible classes.

    Attributes:
        model_id (str): Always set to "dummy".
        classes (List[str]): List of possible class labels.

    Methods:
        predict: Generates random predictions for the given prompts and input data.
    """

    def __init__(self, model_id, classes, *args, **kwargs):
        """Initialize the DummyPredictor.

        Parameters
        ----------
        model_id : str
            Model identifier string.
        classes : list
            List of possible class labels.
        """
        self.model_id = "dummy"
        self.classes = classes

    def predict(
        self,
        prompts: List[str],
        xs: np.ndarray,
    ) -> np.ndarray:
        """Generate random predictions for the given prompts and input data.

        Args:
            prompts (List[str]): List of prompts (ignored in this implementation).
            xs (np.ndarray): Array of input data (only the length is used).

        Returns:
            np.ndarray: 2D array of random predictions, shape (len(prompts), len(xs)).
        """
        return np.array([np.random.choice(self.classes, len(xs)) for _ in prompts])

__init__(model_id, classes, *args, **kwargs)

Initialize the DummyPredictor.

Parameters

model_id : str Model identifier string. classes : list List of possible class labels.

Source code in promptolution/predictors/base_predictor.py
def __init__(self, model_id, classes, *args, **kwargs):
    """Initialize the DummyPredictor.

    Parameters
    ----------
    model_id : str
        Model identifier string.
    classes : list
        List of possible class labels.
    """
    self.model_id = "dummy"
    self.classes = classes

predict(prompts, xs)

Generate random predictions for the given prompts and input data.

Parameters:

Name Type Description Default
prompts List[str]

List of prompts (ignored in this implementation).

required
xs ndarray

Array of input data (only the length is used).

required

Returns:

Type Description
ndarray

np.ndarray: 2D array of random predictions, shape (len(prompts), len(xs)).

Source code in promptolution/predictors/base_predictor.py
def predict(
    self,
    prompts: List[str],
    xs: np.ndarray,
) -> np.ndarray:
    """Generate random predictions for the given prompts and input data.

    Args:
        prompts (List[str]): List of prompts (ignored in this implementation).
        xs (np.ndarray): Array of input data (only the length is used).

    Returns:
        np.ndarray: 2D array of random predictions, shape (len(prompts), len(xs)).
    """
    return np.array([np.random.choice(self.classes, len(xs)) for _ in prompts])

classificator

Module for classification predictors.

FirstOccurrenceClassificator

Bases: BasePredictor

A predictor class for classification tasks using language models.

This class takes a language model and a list of classes, and provides a method to predict classes for given prompts and input data. The class labels are extracted by matching the words in the prediction with the list of valid class labels. The first occurrence of a valid class label in the prediction is used as the predicted class. If no valid class label is found, the first class label in the list is used as the default prediction.

Attributes:

Name Type Description
llm

The language model used for generating predictions.

classes List[str]

The list of valid class labels.

Inherits from

BasePredictor: The base class for predictors in the promptolution library.

Source code in promptolution/predictors/classificator.py
class FirstOccurrenceClassificator(BasePredictor):
    """A predictor class for classification tasks using language models.

    This class takes a language model and a list of classes, and provides a method
    to predict classes for given prompts and input data. The class labels are extracted
    by matching the words in the prediction with the list of valid class labels.
    The first occurrence of a valid class label in the prediction is used as the predicted class.
    If no valid class label is found, the first class label in the list is used as the default prediction.

    Attributes:
        llm: The language model used for generating predictions.
        classes (List[str]): The list of valid class labels.

    Inherits from:
        BasePredictor: The base class for predictors in the promptolution library.
    """

    def __init__(self, llm, classes, *args, **kwargs):
        """Initialize the Classificator.

        Args:
            llm: The language model to use for predictions.
            classes (List[str]): The list of valid class labels.
        """
        super().__init__(llm)
        assert all([c.islower() for c in classes]), "Class labels should be lowercase."
        self.classes = classes

        self.extraction_description = (
            f"The task is to classify the texts into one of those classes: {', '.join(classes)}."
            "The first occurrence of a valid class label in the prediction is used as the predicted class."
        )

    def _extract_preds(self, preds: List[str]) -> np.ndarray:
        """Extract class labels from the predictions, based on the list of valid class labels.

        Args:
            preds: The raw predictions from the language model.
        """
        response = []
        for pred in preds:
            predicted_class = self.classes[0]  # use first class as default pred
            for word in pred.split():
                word = "".join([c for c in word if c.isalnum()]).lower()
                if word in self.classes:
                    predicted_class = word
                    break

            response.append(predicted_class)

        response = np.array(response)
        return response

__init__(llm, classes, *args, **kwargs)

Initialize the Classificator.

Parameters:

Name Type Description Default
llm

The language model to use for predictions.

required
classes List[str]

The list of valid class labels.

required
Source code in promptolution/predictors/classificator.py
def __init__(self, llm, classes, *args, **kwargs):
    """Initialize the Classificator.

    Args:
        llm: The language model to use for predictions.
        classes (List[str]): The list of valid class labels.
    """
    super().__init__(llm)
    assert all([c.islower() for c in classes]), "Class labels should be lowercase."
    self.classes = classes

    self.extraction_description = (
        f"The task is to classify the texts into one of those classes: {', '.join(classes)}."
        "The first occurrence of a valid class label in the prediction is used as the predicted class."
    )

MarkerBasedClassificator

Bases: BasePredictor

A predictor class for classification tasks using language models.

This class takes a language model and a list of classes, and provides a method to predict classes for given prompts and input data. The class labels are extracted.

Attributes:

Name Type Description
llm

The language model used for generating predictions.

classes List[str]

The list of valid class labels.

marker str

The marker to use for extracting the class label.

Inherits from

BasePredictor: The base class for predictors in the promptolution library.

Source code in promptolution/predictors/classificator.py
class MarkerBasedClassificator(BasePredictor):
    """A predictor class for classification tasks using language models.

    This class takes a language model and a list of classes, and provides a method
    to predict classes for given prompts and input data. The class labels are extracted.

    Attributes:
        llm: The language model used for generating predictions.
        classes (List[str]): The list of valid class labels.
        marker (str): The marker to use for extracting the class label.

    Inherits from:
        BasePredictor: The base class for predictors in the promptolution library.
    """

    def __init__(self, llm, classes=None, begin_marker="<final_answer>", end_marker="</final_answer>", *args, **kwargs):
        """Initialize the Classificator.

        Args:
            llm: The language model to use for predictions.
            classes (List[str]): The list of valid class labels. If None, does not force any class.
            begin_marker (str): The marker to use for extracting the class label.
            end_marker (str): The marker to use for extracting the class label.
            *args, **kwargs: Additional arguments for the BasePredictor.
        """
        super().__init__(llm)
        self.classes = classes
        self.begin_marker = begin_marker
        self.end_marker = end_marker

        if classes is not None:
            assert all([c.islower() for c in classes]), "Class labels should be lowercase."

            self.extraction_description = (
                f"The task is to classify the texts into one of those classes: {','.join(classes)}."
                f"The class label is extracted from the text that are between these markers: {begin_marker} and {end_marker}."
            )
        else:
            self.extraction_description = f"The class label is extracted from the text that are between these markers: {begin_marker} and {end_marker}."

    def _extract_preds(self, preds: List[str]) -> np.ndarray:
        """Extract class labels from the predictions, by extracting the text following the marker.

        Args:
            preds: The raw predictions from the language model.
        """
        response = []
        for pred in preds:
            pred = pred.split(self.begin_marker)[-1].split(self.end_marker)[0].strip().lower()
            if self.classes is not None and pred not in self.classes:
                pred = self.classes[0]

            response.append(pred)

        response = np.array(response)
        return response

__init__(llm, classes=None, begin_marker='<final_answer>', end_marker='</final_answer>', *args, **kwargs)

Initialize the Classificator.

Parameters:

Name Type Description Default
llm

The language model to use for predictions.

required
classes List[str]

The list of valid class labels. If None, does not force any class.

None
begin_marker str

The marker to use for extracting the class label.

'<final_answer>'
end_marker str

The marker to use for extracting the class label.

'</final_answer>'
*args, **kwargs

Additional arguments for the BasePredictor.

required
Source code in promptolution/predictors/classificator.py
def __init__(self, llm, classes=None, begin_marker="<final_answer>", end_marker="</final_answer>", *args, **kwargs):
    """Initialize the Classificator.

    Args:
        llm: The language model to use for predictions.
        classes (List[str]): The list of valid class labels. If None, does not force any class.
        begin_marker (str): The marker to use for extracting the class label.
        end_marker (str): The marker to use for extracting the class label.
        *args, **kwargs: Additional arguments for the BasePredictor.
    """
    super().__init__(llm)
    self.classes = classes
    self.begin_marker = begin_marker
    self.end_marker = end_marker

    if classes is not None:
        assert all([c.islower() for c in classes]), "Class labels should be lowercase."

        self.extraction_description = (
            f"The task is to classify the texts into one of those classes: {','.join(classes)}."
            f"The class label is extracted from the text that are between these markers: {begin_marker} and {end_marker}."
        )
    else:
        self.extraction_description = f"The class label is extracted from the text that are between these markers: {begin_marker} and {end_marker}."