Skip to content

Tasks

Module for task-related functions and classes.

base_task

Base module for tasks.

BaseTask

Bases: ABC

Abstract base class for tasks in the promptolution library.

This class defines the interface that all concrete task implementations should follow.

Methods:

Name Description
evaluate

An abstract method that should be implemented by subclasses to evaluate prompts using a given predictor.

Source code in promptolution/tasks/base_task.py
class BaseTask(ABC):
    """Abstract base class for tasks in the promptolution library.

    This class defines the interface that all concrete task implementations should follow.

    Methods:
        evaluate: An abstract method that should be implemented by subclasses
                  to evaluate prompts using a given predictor.
    """

    def __init__(self, *args, **kwargs):
        """Initialize the BaseTask."""
        pass

    @abstractmethod
    def evaluate(self, prompts: List[str], predictor) -> np.ndarray:
        """Abstract method to evaluate prompts using a given predictor.

        Args:
            prompts (List[str]): List of prompts to evaluate.
            predictor: The predictor to use for evaluation.

        Returns:
            np.ndarray: Array of evaluation scores for each prompt.

        Raises:
            NotImplementedError: If not implemented by a subclass.
        """
        raise NotImplementedError

__init__(*args, **kwargs)

Initialize the BaseTask.

Source code in promptolution/tasks/base_task.py
def __init__(self, *args, **kwargs):
    """Initialize the BaseTask."""
    pass

evaluate(prompts, predictor) abstractmethod

Abstract method to evaluate prompts using a given predictor.

Parameters:

Name Type Description Default
prompts List[str]

List of prompts to evaluate.

required
predictor

The predictor to use for evaluation.

required

Returns:

Type Description
ndarray

np.ndarray: Array of evaluation scores for each prompt.

Raises:

Type Description
NotImplementedError

If not implemented by a subclass.

Source code in promptolution/tasks/base_task.py
@abstractmethod
def evaluate(self, prompts: List[str], predictor) -> np.ndarray:
    """Abstract method to evaluate prompts using a given predictor.

    Args:
        prompts (List[str]): List of prompts to evaluate.
        predictor: The predictor to use for evaluation.

    Returns:
        np.ndarray: Array of evaluation scores for each prompt.

    Raises:
        NotImplementedError: If not implemented by a subclass.
    """
    raise NotImplementedError

DummyTask

Bases: BaseTask

A dummy task implementation for testing purposes.

This task generates random evaluation scores for given prompts.

Attributes:

Name Type Description
initial_population List[str]

List of initial prompts.

description str

Description of the dummy task.

xs ndarray

Array of dummy input data.

ys ndarray

Array of dummy labels.

classes List[str]

List of possible class labels.

Source code in promptolution/tasks/base_task.py
class DummyTask(BaseTask):
    """A dummy task implementation for testing purposes.

    This task generates random evaluation scores for given prompts.

    Attributes:
        initial_population (List[str]): List of initial prompts.
        description (str): Description of the dummy task.
        xs (np.ndarray): Array of dummy input data.
        ys (np.ndarray): Array of dummy labels.
        classes (List[str]): List of possible class labels.
    """

    def __init__(self):
        """Initialize the DummyTask."""
        self.initial_population = ["Some", "initial", "prompts", "that", "will", "do", "the", "trick"]
        self.description = "This is a dummy task for testing purposes."
        self.xs = np.array(["This is a test", "This is another test", "This is a third test"])
        self.ys = np.array(["positive", "negative", "positive"])
        self.classes = ["negative", "positive"]

    def evaluate(self, prompts: List[str], predictor) -> np.ndarray:
        """Generate random evaluation scores for the given prompts.

        Args:
            prompts (List[str]): List of prompts to evaluate.
            predictor: The predictor to use for evaluation (ignored in this implementation).

        Returns:
            np.ndarray: Array of random evaluation scores, one for each prompt.
        """
        return np.array([np.random.rand()] * len(prompts))

__init__()

Initialize the DummyTask.

Source code in promptolution/tasks/base_task.py
def __init__(self):
    """Initialize the DummyTask."""
    self.initial_population = ["Some", "initial", "prompts", "that", "will", "do", "the", "trick"]
    self.description = "This is a dummy task for testing purposes."
    self.xs = np.array(["This is a test", "This is another test", "This is a third test"])
    self.ys = np.array(["positive", "negative", "positive"])
    self.classes = ["negative", "positive"]

evaluate(prompts, predictor)

Generate random evaluation scores for the given prompts.

Parameters:

Name Type Description Default
prompts List[str]

List of prompts to evaluate.

required
predictor

The predictor to use for evaluation (ignored in this implementation).

required

Returns:

Type Description
ndarray

np.ndarray: Array of random evaluation scores, one for each prompt.

Source code in promptolution/tasks/base_task.py
def evaluate(self, prompts: List[str], predictor) -> np.ndarray:
    """Generate random evaluation scores for the given prompts.

    Args:
        prompts (List[str]): List of prompts to evaluate.
        predictor: The predictor to use for evaluation (ignored in this implementation).

    Returns:
        np.ndarray: Array of random evaluation scores, one for each prompt.
    """
    return np.array([np.random.rand()] * len(prompts))

classification_tasks

Module for classification tasks.

ClassificationTask

Bases: BaseTask

A class representing a classification task in the promptolution library.

This class handles the loading and management of classification datasets, as well as the evaluation of predictors on these datasets.

Attributes:

Name Type Description
description str

Description of the task.

classes List[str]

List of possible class labels.

xs ndarray

Array of input data.

ys ndarray

Array of labels.

initial_prompts List[str]

Initial set of prompts to start optimization with.

metric Callable

Metric to use for evaluation.

Inherits from

BaseTask: The base class for tasks in the promptolution library.

Source code in promptolution/tasks/classification_tasks.py
class ClassificationTask(BaseTask):
    """A class representing a classification task in the promptolution library.

    This class handles the loading and management of classification datasets,
    as well as the evaluation of predictors on these datasets.

    Attributes:
        description (str): Description of the task.
        classes (List[str]): List of possible class labels.
        xs (np.ndarray): Array of input data.
        ys (np.ndarray): Array of labels.
        initial_prompts (List[str]): Initial set of prompts to start optimization with.
        metric (Callable): Metric to use for evaluation.

    Inherits from:
        BaseTask: The base class for tasks in the promptolution library.
    """

    def __init__(
        self,
        df: pd.DataFrame,
        description: str,
        initial_prompts: List[str] = None,
        x_column: str = "x",
        y_column: str = "y",
        metric: Callable = accuracy_score,
    ):
        """Initialize the ClassificationTask from a pandas DataFrame.

        Args:
            df (pd.DataFrame): Input DataFrame containing the data
            description (str): Description of the task
            initial_prompts (List[str], optional): Initial set of prompts to start optimization with. Defaults to None.
            x_column (str, optional): Name of the column containing input texts. Defaults to "x".
            y_column (str, optional): Name of the column containing labels. Defaults to "y".
            seed (int, optional): Random seed for reproducibility. Defaults to 42.
            metric (Callable, optional): Metric to use for evaluation. Defaults to accuracy_score.
        """
        super().__init__()
        self.description = description
        self.initial_prompts = initial_prompts
        self.metric = metric

        df[y_column] = df[y_column].str.lower()
        self.classes = df[y_column].unique()

        self.xs = df[x_column].values
        self.ys = df[y_column].values

    def evaluate(
        self,
        prompts: List[str],
        predictor: BasePredictor,
        n_samples: int = 20,
        subsample: bool = False,
        return_seq: bool = False,
    ) -> np.ndarray:
        """Evaluate a set of prompts using a given predictor.

        Args:
            prompts (List[str]): List of prompts to evaluate.
            predictor (BasePredictor): Predictor to use for evaluation.
            n_samples (int, optional): Number of samples to use if subsampling. Defaults to 20.
            subsample (bool, optional): Whether to use subsampling.
            If set to true, samples a different subset per call. Defaults to False.
            return_seq (bool, optional): whether to return the generating sequence
            subsample (bool, optional): Whether to use subsampling.
            If set to true, samples a different subset per call. Defaults to False.
            return_seq (bool, optional): whether to return the generating sequence

        Returns:
            np.ndarray: Array of accuracy scores for each prompt.
        """
        if isinstance(prompts, str):
            prompts = [prompts]
        # Randomly select a subsample of n_samples
        if subsample:
            indices = np.random.choice(len(self.xs), n_samples, replace=False)
        else:
            indices = np.arange(len(self.xs))

        xs_subsample = self.xs[indices]
        ys_subsample = self.ys[indices]

        # Make predictions on the subsample
        preds = predictor.predict(prompts, xs_subsample, return_seq=return_seq)

        if return_seq:
            preds, seqs = preds

        scores = np.array([self.metric(ys_subsample, pred) for pred in preds])

        if return_seq:
            return scores, seqs

        return scores

__init__(df, description, initial_prompts=None, x_column='x', y_column='y', metric=accuracy_score)

Initialize the ClassificationTask from a pandas DataFrame.

Parameters:

Name Type Description Default
df DataFrame

Input DataFrame containing the data

required
description str

Description of the task

required
initial_prompts List[str]

Initial set of prompts to start optimization with. Defaults to None.

None
x_column str

Name of the column containing input texts. Defaults to "x".

'x'
y_column str

Name of the column containing labels. Defaults to "y".

'y'
seed int

Random seed for reproducibility. Defaults to 42.

required
metric Callable

Metric to use for evaluation. Defaults to accuracy_score.

accuracy_score
Source code in promptolution/tasks/classification_tasks.py
def __init__(
    self,
    df: pd.DataFrame,
    description: str,
    initial_prompts: List[str] = None,
    x_column: str = "x",
    y_column: str = "y",
    metric: Callable = accuracy_score,
):
    """Initialize the ClassificationTask from a pandas DataFrame.

    Args:
        df (pd.DataFrame): Input DataFrame containing the data
        description (str): Description of the task
        initial_prompts (List[str], optional): Initial set of prompts to start optimization with. Defaults to None.
        x_column (str, optional): Name of the column containing input texts. Defaults to "x".
        y_column (str, optional): Name of the column containing labels. Defaults to "y".
        seed (int, optional): Random seed for reproducibility. Defaults to 42.
        metric (Callable, optional): Metric to use for evaluation. Defaults to accuracy_score.
    """
    super().__init__()
    self.description = description
    self.initial_prompts = initial_prompts
    self.metric = metric

    df[y_column] = df[y_column].str.lower()
    self.classes = df[y_column].unique()

    self.xs = df[x_column].values
    self.ys = df[y_column].values

evaluate(prompts, predictor, n_samples=20, subsample=False, return_seq=False)

Evaluate a set of prompts using a given predictor.

Parameters:

Name Type Description Default
prompts List[str]

List of prompts to evaluate.

required
predictor BasePredictor

Predictor to use for evaluation.

required
n_samples int

Number of samples to use if subsampling. Defaults to 20.

20
subsample bool

Whether to use subsampling.

False
return_seq bool

whether to return the generating sequence

False
subsample bool

Whether to use subsampling.

False
return_seq bool

whether to return the generating sequence

False

Returns:

Type Description
ndarray

np.ndarray: Array of accuracy scores for each prompt.

Source code in promptolution/tasks/classification_tasks.py
def evaluate(
    self,
    prompts: List[str],
    predictor: BasePredictor,
    n_samples: int = 20,
    subsample: bool = False,
    return_seq: bool = False,
) -> np.ndarray:
    """Evaluate a set of prompts using a given predictor.

    Args:
        prompts (List[str]): List of prompts to evaluate.
        predictor (BasePredictor): Predictor to use for evaluation.
        n_samples (int, optional): Number of samples to use if subsampling. Defaults to 20.
        subsample (bool, optional): Whether to use subsampling.
        If set to true, samples a different subset per call. Defaults to False.
        return_seq (bool, optional): whether to return the generating sequence
        subsample (bool, optional): Whether to use subsampling.
        If set to true, samples a different subset per call. Defaults to False.
        return_seq (bool, optional): whether to return the generating sequence

    Returns:
        np.ndarray: Array of accuracy scores for each prompt.
    """
    if isinstance(prompts, str):
        prompts = [prompts]
    # Randomly select a subsample of n_samples
    if subsample:
        indices = np.random.choice(len(self.xs), n_samples, replace=False)
    else:
        indices = np.arange(len(self.xs))

    xs_subsample = self.xs[indices]
    ys_subsample = self.ys[indices]

    # Make predictions on the subsample
    preds = predictor.predict(prompts, xs_subsample, return_seq=return_seq)

    if return_seq:
        preds, seqs = preds

    scores = np.array([self.metric(ys_subsample, pred) for pred in preds])

    if return_seq:
        return scores, seqs

    return scores