Skip to content

Optimizers

Module for prompt optimizers.

base_optimizer

Base module for optimizers in the promptolution library.

BaseOptimizer

Bases: ABC

Abstract base class for prompt optimizers.

This class defines the basic structure and interface for prompt optimization algorithms.

Attributes:

Name Type Description
config ExperimentConfig

Configuration for the optimizer, overriding defaults.

prompts List[str]

List of current prompts being optimized.

task BaseTask

The task object used for evaluating prompts.

callbacks List[Callable]

List of callback functions to be called during optimization.

predictor

The predictor used for prompt evaluation (if applicable).

Source code in promptolution/optimizers/base_optimizer.py
class BaseOptimizer(ABC):
    """Abstract base class for prompt optimizers.

    This class defines the basic structure and interface for prompt optimization algorithms.

    Attributes:
        config (ExperimentConfig, optional): Configuration for the optimizer, overriding defaults.
        prompts (List[str]): List of current prompts being optimized.
        task (BaseTask): The task object used for evaluating prompts.
        callbacks (List[Callable]): List of callback functions to be called during optimization.
        predictor: The predictor used for prompt evaluation (if applicable).
    """

    def __init__(
        self,
        predictor,
        task: "BaseTask",
        initial_prompts: List[str],
        callbacks: List[Callable] = None,
        config: "ExperimentConfig" = None,
    ):
        """Initialize the optimizer with a configuration and/or direct parameters.

        Args:
            initial_prompts: Initial set of prompts to start optimization with.
            task: Task object for prompt evaluation.
            callbacks: List of callback functions.
            predictor: Predictor for prompt evaluation.
            config (ExperimentConfig, optional): Configuration for the optimizer, overriding defaults.
        """
        # Set up optimizer state
        self.prompts = initial_prompts
        self.task = task
        self.callbacks = callbacks or []
        self.predictor = predictor

        if config is not None:
            config.apply_to(self)

        self.config = config

    def optimize(self, n_steps: int) -> List[str]:
        """Perform the optimization process.

        This method should be implemented by concrete optimizer classes to define
        the specific optimization algorithm.

        Args:
            n_steps (int): Number of optimization steps to perform.

        Returns:
            The optimized list of prompts after all steps.
        """
        # validate config
        if self.config is not None:
            self.config.validate()
        self._pre_optimization_loop()

        for _ in range(n_steps):
            try:
                self.prompts = self._step()
            except Exception as e:
                # exit training loop and gracefully fail
                logger.error(f"⛔ Error during optimization step: {e}")
                logger.error("⚠️ Exiting optimization loop.")
                continue_optimization = False

            # Callbacks at the end of each step
            continue_optimization = self._on_step_end()
            if not continue_optimization:
                break

        self._on_train_end()

        return self.prompts

    @abstractmethod
    def _pre_optimization_loop(self):
        """Prepare for the optimization loop.

        This method should be implemented by concrete optimizer classes to define
        any setup required before the optimization loop starts.
        """
        pass

    @abstractmethod
    def _step(self) -> List[str]:
        """Perform a single optimization step.

        This method should be implemented by concrete optimizer classes to define
        the specific optimization step.

        Returns:
            The optimized list of prompts after the step.
        """
        pass

    def _on_step_end(self):
        """Call all registered callbacks at the end of each optimization step."""
        continue_optimization = True
        for callback in self.callbacks:
            continue_optimization &= callback.on_step_end(self)  # if any callback returns False, end the optimization

        return continue_optimization

    def _on_train_end(self):
        """Call all registered callbacks at the end of the entire optimization process."""
        for callback in self.callbacks:
            callback.on_train_end(self)

__init__(predictor, task, initial_prompts, callbacks=None, config=None)

Initialize the optimizer with a configuration and/or direct parameters.

Parameters:

Name Type Description Default
initial_prompts List[str]

Initial set of prompts to start optimization with.

required
task BaseTask

Task object for prompt evaluation.

required
callbacks List[Callable]

List of callback functions.

None
predictor

Predictor for prompt evaluation.

required
config ExperimentConfig

Configuration for the optimizer, overriding defaults.

None
Source code in promptolution/optimizers/base_optimizer.py
def __init__(
    self,
    predictor,
    task: "BaseTask",
    initial_prompts: List[str],
    callbacks: List[Callable] = None,
    config: "ExperimentConfig" = None,
):
    """Initialize the optimizer with a configuration and/or direct parameters.

    Args:
        initial_prompts: Initial set of prompts to start optimization with.
        task: Task object for prompt evaluation.
        callbacks: List of callback functions.
        predictor: Predictor for prompt evaluation.
        config (ExperimentConfig, optional): Configuration for the optimizer, overriding defaults.
    """
    # Set up optimizer state
    self.prompts = initial_prompts
    self.task = task
    self.callbacks = callbacks or []
    self.predictor = predictor

    if config is not None:
        config.apply_to(self)

    self.config = config

optimize(n_steps)

Perform the optimization process.

This method should be implemented by concrete optimizer classes to define the specific optimization algorithm.

Parameters:

Name Type Description Default
n_steps int

Number of optimization steps to perform.

required

Returns:

Type Description
List[str]

The optimized list of prompts after all steps.

Source code in promptolution/optimizers/base_optimizer.py
def optimize(self, n_steps: int) -> List[str]:
    """Perform the optimization process.

    This method should be implemented by concrete optimizer classes to define
    the specific optimization algorithm.

    Args:
        n_steps (int): Number of optimization steps to perform.

    Returns:
        The optimized list of prompts after all steps.
    """
    # validate config
    if self.config is not None:
        self.config.validate()
    self._pre_optimization_loop()

    for _ in range(n_steps):
        try:
            self.prompts = self._step()
        except Exception as e:
            # exit training loop and gracefully fail
            logger.error(f"⛔ Error during optimization step: {e}")
            logger.error("⚠️ Exiting optimization loop.")
            continue_optimization = False

        # Callbacks at the end of each step
        continue_optimization = self._on_step_end()
        if not continue_optimization:
            break

    self._on_train_end()

    return self.prompts

capo

Implementation of the CAPO (Cost-Aware Prompt Optimization) algorithm.

CAPO

Bases: BaseOptimizer

CAPO: Cost-Aware Prompt Optimization.

This class implements an evolutionary algorithm for optimizing prompts in large language models by incorporating racing techniques and multi-objective optimization. It uses crossover, mutation, and racing based on evaluation scores and statistical tests to improve efficiency while balancing performance with prompt length. It is adapted from the paper "CAPO: Cost-Aware Prompt Optimization" by Zehle et al., 2025.

Source code in promptolution/optimizers/capo.py
class CAPO(BaseOptimizer):
    """CAPO: Cost-Aware Prompt Optimization.

    This class implements an evolutionary algorithm for optimizing prompts in large language models
    by incorporating racing techniques and multi-objective optimization. It uses crossover, mutation,
    and racing based on evaluation scores and statistical tests to improve efficiency while balancing
    performance with prompt length. It is adapted from the paper "CAPO: Cost-Aware Prompt Optimization" by Zehle et al., 2025.
    """

    def __init__(
        self,
        predictor: "BasePredictor",
        task: "BaseTask",
        meta_llm: "BaseLLM",
        initial_prompts: List[str] = None,
        crossovers_per_iter: int = 4,
        upper_shots: int = 5,
        max_n_blocks_eval: int = 10,
        test_statistic: "TestStatistics" = "paired_t_test",
        alpha: float = 0.2,
        length_penalty: float = 0.05,
        df_few_shots: pd.DataFrame = None,
        crossover_template: str = None,
        mutation_template: str = None,
        callbacks: List[Callable] = [],
        config: "ExperimentConfig" = None,
    ):
        """Initializes the CAPOptimizer with various parameters for prompt evolution.

        Args:
            predictor (BasePredictor): The predictor for evaluating prompt performance.
            task (BaseTask): The task instance containing dataset and description.
            meta_llm (BaseLLM): The meta language model for crossover/mutation.
            initial_prompts (List[str]): Initial prompt instructions.
            crossovers_per_iter (int): Number of crossover operations per iteration.
            upper_shots (int): Maximum number of few-shot examples per prompt.
            p_few_shot_reasoning (float): Probability of generating llm-reasoning for few-shot examples, instead of simply using input-output pairs.
            max_n_blocks_eval (int): Maximum number of evaluation blocks.
            test_statistic (TestStatistics): Statistical test to compare prompt performance. Default is "paired_t_test".
            alpha (float): Significance level for the statistical test.
            length_penalty (float): Penalty factor for prompt length.
            df_few_shots (pd.DataFrame): DataFrame containing few-shot examples. If None, will pop 10% of datapoints from task.
            crossover_template (str, optional): Template for crossover instructions.
            mutation_template (str, optional): Template for mutation instructions.
            callbacks (List[Callable], optional): Callbacks for optimizer events.
            config (ExperimentConfig, optional): Configuration for the optimizer.
        """
        self.meta_llm = meta_llm
        self.downstream_llm = predictor.llm

        self.crossover_template = crossover_template or CAPO_CROSSOVER_TEMPLATE
        self.mutation_template = mutation_template or CAPO_MUTATION_TEMPLATE

        self.crossovers_per_iter = crossovers_per_iter
        self.upper_shots = upper_shots
        self.max_n_blocks_eval = max_n_blocks_eval
        self.test_statistic = get_test_statistic_func(test_statistic)
        self.alpha = alpha

        self.length_penalty = length_penalty
        self.token_counter = get_token_counter(self.downstream_llm)

        self.scores = np.empty(0)
        super().__init__(predictor, task, initial_prompts, callbacks, config)
        self.df_few_shots = df_few_shots if df_few_shots is not None else task.pop_datapoints(frac=0.1)
        if self.max_n_blocks_eval > self.task.n_blocks:
            logger.warning(
                f"ℹ️ max_n_blocks_eval ({self.max_n_blocks_eval}) is larger than the number of blocks ({self.task.n_blocks})."
                f" Setting max_n_blocks_eval to {self.task.n_blocks}."
            )
            self.max_n_blocks_eval = self.task.n_blocks
        self.population_size = len(self.prompts)

        if hasattr(self.predictor, "begin_marker") and hasattr(self.predictor, "end_marker"):
            self.target_begin_marker = self.predictor.begin_marker
            self.target_end_marker = self.predictor.end_marker
        else:
            self.target_begin_marker = ""
            self.target_end_marker = ""

    def _initialize_population(self, initial_prompts: List[str]) -> List[CAPOPrompt]:
        """Initializes the population of Prompt objects from initial instructions.

        Args:
            initial_prompts (List[str]): List of initial prompt instructions.

        Returns:
            List[Prompt]: Initialized population of prompts with few-shot examples.
        """
        population = []
        for instruction_text in initial_prompts:
            num_examples = random.randint(0, self.upper_shots)
            few_shots = self._create_few_shot_examples(instruction_text, num_examples)
            population.append(CAPOPrompt(instruction_text, few_shots))

        return population

    def _create_few_shot_examples(self, instruction: str, num_examples: int) -> List[Tuple[str, str]]:
        if num_examples == 0:
            return []

        few_shot_samples = self.df_few_shots.sample(num_examples, replace=False)
        sample_inputs = few_shot_samples[self.task.x_column].values
        sample_targets = few_shot_samples[self.task.y_column].values
        few_shots = [
            CAPO_FEWSHOT_TEMPLATE.replace("<input>", i).replace(
                "<output>", f"{self.target_begin_marker}{t}{self.target_end_marker}"
            )
            for i, t in zip(sample_inputs, sample_targets)
        ]
        # Select partition of the examples to generate reasoning from downstream model
        preds, seqs = self.predictor.predict(
            [instruction] * num_examples,
            sample_inputs,
            return_seq=True,
        )

        # Check which predictions are correct and get a single one per example
        for j in range(num_examples):
            # Process and clean up the generated sequences
            seqs[j] = seqs[j].replace(sample_inputs[j], "").strip()
            # Check if the prediction is correct and add reasoning if so
            if preds[j] == sample_targets[j]:
                few_shots[j] = CAPO_FEWSHOT_TEMPLATE.replace("<input>", sample_inputs[j]).replace("<output>", seqs[j])

        return few_shots

    def _crossover(self, parents: List[CAPOPrompt]) -> List[CAPOPrompt]:
        """Performs crossover among parent prompts to generate offsprings.

        Args:
            parents (List[CAPOPrompt]): List of parent prompts.

        Returns:
            List[Prompt]: List of new offsprings after crossover.
        """
        crossover_prompts = []
        offspring_few_shots = []
        for _ in range(self.crossovers_per_iter):
            mother, father = random.sample(parents, 2)
            crossover_prompt = (
                self.crossover_template.replace("<mother>", mother.instruction_text)
                .replace("<father>", father.instruction_text)
                .strip()
            )
            # collect all crossover prompts then pass them bundled to the meta llm (speedup)
            crossover_prompts.append(crossover_prompt)
            combined_few_shots = mother.few_shots + father.few_shots
            num_few_shots = (len(mother.few_shots) + len(father.few_shots)) // 2
            offspring_few_shot = random.sample(combined_few_shots, num_few_shots)
            offspring_few_shots.append(offspring_few_shot)

        child_instructions = self.meta_llm.get_response(crossover_prompts)

        offsprings = []
        for instruction, examples in zip(child_instructions, offspring_few_shots):
            instruction = instruction.split("<prompt>")[-1].split("</prompt>")[0].strip()
            offsprings.append(CAPOPrompt(instruction, examples))

        return offsprings

    def _mutate(self, offsprings: List[CAPOPrompt]) -> List[CAPOPrompt]:
        """Apply mutation to offsprings to generate new candidate prompts.

        Args:
            offsprings (List[CAPOPrompt]): List of offsprings to mutate.

        Returns:
            List[Prompt]: List of mutated prompts.
        """
        # collect all mutation prompts then pass them bundled to the meta llm (speedup)
        mutation_prompts = [
            self.mutation_template.replace("<instruction>", prompt.instruction_text) for prompt in offsprings
        ]
        new_instructions = self.meta_llm.get_response(mutation_prompts)

        mutated = []
        for new_instruction, prompt in zip(new_instructions, offsprings):
            new_instruction = new_instruction.split("<prompt>")[-1].split("</prompt>")[0].strip()
            p = random.random()

            if p < 1 / 3 and len(prompt.few_shots) < self.upper_shots:  # add a random few shot
                new_few_shot = self._create_few_shot_examples(new_instruction, 1)
                new_few_shots = prompt.few_shots + new_few_shot
            if 1 / 3 <= p < 2 / 3 and len(prompt.few_shots) > 0:  # remove a random few shot
                new_few_shots = random.sample(prompt.few_shots, len(prompt.few_shots) - 1)
            else:  # do not change few shots, but shuffle
                new_few_shots = prompt.few_shots

            random.shuffle(new_few_shots)
            mutated.append(CAPOPrompt(new_instruction, new_few_shots))

        return mutated

    def _do_racing(self, candidates: List[CAPOPrompt], k: int) -> List[CAPOPrompt]:
        """Perform the racing (selection) phase by comparing candidates based on their evaluation scores using the provided test statistic.

        Args:
            candidates (List[CAPOPrompt]): List of candidate prompts.
            k (int): Number of survivors to retain.

        Returns:
            List[Prompt]: List of surviving prompts after racing.
        """
        self.task.reset_block_idx()
        block_scores = []
        i = 0
        while len(candidates) > k and i < self.max_n_blocks_eval:
            # new_scores shape: (n_candidates, n_samples)
            new_scores = self.task.evaluate(
                [c.construct_prompt() for c in candidates], self.predictor, return_agg_scores=False
            )

            # subtract length penalty
            prompt_lengths = np.array([self.token_counter(c.construct_prompt()) for c in candidates])
            rel_prompt_lengths = prompt_lengths / self.max_prompt_length

            new_scores = new_scores - self.length_penalty * rel_prompt_lengths[:, None]
            block_scores.append(new_scores)
            scores = np.concatenate(block_scores, axis=1)

            # boolean matrix C_ij indicating if candidate j is better than candidate i
            comparison_matrix = np.array(
                [[self.test_statistic(other_score, score, self.alpha) for other_score in scores] for score in scores]
            )

            # Sum along rows to get number of better scores for each candidate
            n_better = np.sum(comparison_matrix, axis=1)

            # Create mask for survivors and filter candidates
            candidates = list(compress(candidates, n_better < k))
            block_scores = [bs[n_better < k] for bs in block_scores]

            i += 1
            self.task.increment_block_idx()

        avg_scores = self.task.evaluate(
            [c.construct_prompt() for c in candidates], self.predictor, eval_strategy="evaluated"
        )
        order = np.argsort(-avg_scores)[:k]
        candidates = [candidates[i] for i in order]
        self.scores = avg_scores[order]

        return candidates

    def _pre_optimization_loop(self):
        self.prompt_objects = self._initialize_population(self.prompts)
        self.prompts = [p.construct_prompt() for p in self.prompt_objects]
        self.max_prompt_length = max(self.token_counter(p) for p in self.prompts)
        self.task.reset_block_idx()

    def _step(self) -> List[str]:
        """Perform a single optimization step.

        Returns:
            List[str]: The optimized list of prompts after the step.
        """
        offsprings = self._crossover(self.prompt_objects)
        mutated = self._mutate(offsprings)
        combined = self.prompt_objects + mutated

        self.prompt_objects = self._do_racing(combined, self.population_size)
        self.prompts = [p.construct_prompt() for p in self.prompt_objects]

        return self.prompts

__init__(predictor, task, meta_llm, initial_prompts=None, crossovers_per_iter=4, upper_shots=5, max_n_blocks_eval=10, test_statistic='paired_t_test', alpha=0.2, length_penalty=0.05, df_few_shots=None, crossover_template=None, mutation_template=None, callbacks=[], config=None)

Initializes the CAPOptimizer with various parameters for prompt evolution.

Parameters:

Name Type Description Default
predictor BasePredictor

The predictor for evaluating prompt performance.

required
task BaseTask

The task instance containing dataset and description.

required
meta_llm BaseLLM

The meta language model for crossover/mutation.

required
initial_prompts List[str]

Initial prompt instructions.

None
crossovers_per_iter int

Number of crossover operations per iteration.

4
upper_shots int

Maximum number of few-shot examples per prompt.

5
p_few_shot_reasoning float

Probability of generating llm-reasoning for few-shot examples, instead of simply using input-output pairs.

required
max_n_blocks_eval int

Maximum number of evaluation blocks.

10
test_statistic TestStatistics

Statistical test to compare prompt performance. Default is "paired_t_test".

'paired_t_test'
alpha float

Significance level for the statistical test.

0.2
length_penalty float

Penalty factor for prompt length.

0.05
df_few_shots DataFrame

DataFrame containing few-shot examples. If None, will pop 10% of datapoints from task.

None
crossover_template str

Template for crossover instructions.

None
mutation_template str

Template for mutation instructions.

None
callbacks List[Callable]

Callbacks for optimizer events.

[]
config ExperimentConfig

Configuration for the optimizer.

None
Source code in promptolution/optimizers/capo.py
def __init__(
    self,
    predictor: "BasePredictor",
    task: "BaseTask",
    meta_llm: "BaseLLM",
    initial_prompts: List[str] = None,
    crossovers_per_iter: int = 4,
    upper_shots: int = 5,
    max_n_blocks_eval: int = 10,
    test_statistic: "TestStatistics" = "paired_t_test",
    alpha: float = 0.2,
    length_penalty: float = 0.05,
    df_few_shots: pd.DataFrame = None,
    crossover_template: str = None,
    mutation_template: str = None,
    callbacks: List[Callable] = [],
    config: "ExperimentConfig" = None,
):
    """Initializes the CAPOptimizer with various parameters for prompt evolution.

    Args:
        predictor (BasePredictor): The predictor for evaluating prompt performance.
        task (BaseTask): The task instance containing dataset and description.
        meta_llm (BaseLLM): The meta language model for crossover/mutation.
        initial_prompts (List[str]): Initial prompt instructions.
        crossovers_per_iter (int): Number of crossover operations per iteration.
        upper_shots (int): Maximum number of few-shot examples per prompt.
        p_few_shot_reasoning (float): Probability of generating llm-reasoning for few-shot examples, instead of simply using input-output pairs.
        max_n_blocks_eval (int): Maximum number of evaluation blocks.
        test_statistic (TestStatistics): Statistical test to compare prompt performance. Default is "paired_t_test".
        alpha (float): Significance level for the statistical test.
        length_penalty (float): Penalty factor for prompt length.
        df_few_shots (pd.DataFrame): DataFrame containing few-shot examples. If None, will pop 10% of datapoints from task.
        crossover_template (str, optional): Template for crossover instructions.
        mutation_template (str, optional): Template for mutation instructions.
        callbacks (List[Callable], optional): Callbacks for optimizer events.
        config (ExperimentConfig, optional): Configuration for the optimizer.
    """
    self.meta_llm = meta_llm
    self.downstream_llm = predictor.llm

    self.crossover_template = crossover_template or CAPO_CROSSOVER_TEMPLATE
    self.mutation_template = mutation_template or CAPO_MUTATION_TEMPLATE

    self.crossovers_per_iter = crossovers_per_iter
    self.upper_shots = upper_shots
    self.max_n_blocks_eval = max_n_blocks_eval
    self.test_statistic = get_test_statistic_func(test_statistic)
    self.alpha = alpha

    self.length_penalty = length_penalty
    self.token_counter = get_token_counter(self.downstream_llm)

    self.scores = np.empty(0)
    super().__init__(predictor, task, initial_prompts, callbacks, config)
    self.df_few_shots = df_few_shots if df_few_shots is not None else task.pop_datapoints(frac=0.1)
    if self.max_n_blocks_eval > self.task.n_blocks:
        logger.warning(
            f"ℹ️ max_n_blocks_eval ({self.max_n_blocks_eval}) is larger than the number of blocks ({self.task.n_blocks})."
            f" Setting max_n_blocks_eval to {self.task.n_blocks}."
        )
        self.max_n_blocks_eval = self.task.n_blocks
    self.population_size = len(self.prompts)

    if hasattr(self.predictor, "begin_marker") and hasattr(self.predictor, "end_marker"):
        self.target_begin_marker = self.predictor.begin_marker
        self.target_end_marker = self.predictor.end_marker
    else:
        self.target_begin_marker = ""
        self.target_end_marker = ""

CAPOPrompt

Represents a prompt consisting of an instruction and few-shot examples.

Source code in promptolution/optimizers/capo.py
class CAPOPrompt:
    """Represents a prompt consisting of an instruction and few-shot examples."""

    def __init__(self, instruction_text: str, few_shots: List[str]):
        """Initializes the Prompt with an instruction and associated examples.

        Args:
            instruction_text (str): The instruction or prompt text.
            few_shots (List[str]): List of examples as string.
        """
        self.instruction_text = instruction_text.strip()
        self.few_shots = few_shots

    def construct_prompt(self) -> str:
        """Constructs the full prompt string by replacing placeholders in the template with the instruction and formatted examples.

        Returns:
            str: The constructed prompt string.
        """
        few_shot_str = "\n\n".join(self.few_shots).strip()
        prompt = (
            CAPO_DOWNSTREAM_TEMPLATE.replace("<instruction>", self.instruction_text)
            .replace("<few_shots>", few_shot_str)
            .replace("\n\n\n\n", "\n\n")  # replace extra newlines if no few shots are provided
            .strip()
        )
        return prompt

    def __str__(self):
        """Returns the string representation of the prompt."""
        return self.construct_prompt()

__init__(instruction_text, few_shots)

Initializes the Prompt with an instruction and associated examples.

Parameters:

Name Type Description Default
instruction_text str

The instruction or prompt text.

required
few_shots List[str]

List of examples as string.

required
Source code in promptolution/optimizers/capo.py
def __init__(self, instruction_text: str, few_shots: List[str]):
    """Initializes the Prompt with an instruction and associated examples.

    Args:
        instruction_text (str): The instruction or prompt text.
        few_shots (List[str]): List of examples as string.
    """
    self.instruction_text = instruction_text.strip()
    self.few_shots = few_shots

__str__()

Returns the string representation of the prompt.

Source code in promptolution/optimizers/capo.py
def __str__(self):
    """Returns the string representation of the prompt."""
    return self.construct_prompt()

construct_prompt()

Constructs the full prompt string by replacing placeholders in the template with the instruction and formatted examples.

Returns:

Name Type Description
str str

The constructed prompt string.

Source code in promptolution/optimizers/capo.py
def construct_prompt(self) -> str:
    """Constructs the full prompt string by replacing placeholders in the template with the instruction and formatted examples.

    Returns:
        str: The constructed prompt string.
    """
    few_shot_str = "\n\n".join(self.few_shots).strip()
    prompt = (
        CAPO_DOWNSTREAM_TEMPLATE.replace("<instruction>", self.instruction_text)
        .replace("<few_shots>", few_shot_str)
        .replace("\n\n\n\n", "\n\n")  # replace extra newlines if no few shots are provided
        .strip()
    )
    return prompt

evoprompt_de

Module for EvoPromptDE optimizer.

EvoPromptDE

Bases: BaseOptimizer

EvoPromptDE: Differential Evolution-based Prompt Optimizer.

This class implements a differential evolution algorithm for optimizing prompts in large language models. It is adapted from the paper "Connecting Large Language Models with Evolutionary Algorithms Yields Powerful Prompt Optimizers" by Guo et al., 2023.

The optimizer uses a differential evolution strategy to generate new prompts from existing ones, with an option to use the current best prompt as a donor.

Attributes:

Name Type Description
prompt_template str

Template for generating meta-prompts during evolution.

donor_random bool

If False, uses the current best prompt as a donor; if True, uses a random prompt.

meta_llm

Language model used for generating child prompts from meta-prompts.

Parameters:

Name Type Description Default
prompt_template str

Template for meta-prompts.

required
meta_llm BaseLLM

Language model for child prompt generation.

required
donor_random bool

Whether to use a random donor. Defaults to False.

False
config ExperimentConfig

Configuration for the optimizer, overriding defaults.

None
Source code in promptolution/optimizers/evoprompt_de.py
class EvoPromptDE(BaseOptimizer):
    """EvoPromptDE: Differential Evolution-based Prompt Optimizer.

    This class implements a differential evolution algorithm for optimizing prompts in large language models.
    It is adapted from the paper "Connecting Large Language Models with Evolutionary Algorithms
    Yields Powerful Prompt Optimizers" by Guo et al., 2023.

    The optimizer uses a differential evolution strategy to generate new prompts from existing ones,
    with an option to use the current best prompt as a donor.

    Attributes:
        prompt_template (str): Template for generating meta-prompts during evolution.
        donor_random (bool): If False, uses the current best prompt as a donor; if True, uses a random prompt.
        meta_llm: Language model used for generating child prompts from meta-prompts.

    Args:
        prompt_template (str): Template for meta-prompts.
        meta_llm: Language model for child prompt generation.
        donor_random (bool, optional): Whether to use a random donor. Defaults to False.
        config (ExperimentConfig, optional): Configuration for the optimizer, overriding defaults.
    """

    def __init__(
        self,
        predictor: "BasePredictor",
        task: "BaseTask",
        prompt_template: str,
        meta_llm: "BaseLLM",
        initial_prompts: List[str] = None,
        donor_random: bool = False,
        callbacks: List["BaseCallback"] = None,
        config: "ExperimentConfig" = None,
    ):
        """Initialize the EvoPromptDE optimizer."""
        self.prompt_template = prompt_template
        self.donor_random = donor_random
        self.meta_llm = meta_llm
        super().__init__(
            predictor=predictor, task=task, initial_prompts=initial_prompts, callbacks=callbacks, config=config
        )

    def _pre_optimization_loop(self):
        self.scores = self.task.evaluate(self.prompts, self.predictor, return_agg_scores=True)
        self.prompts = [prompt for _, prompt in sorted(zip(self.scores, self.prompts), reverse=True)]
        self.scores = sorted(self.scores, reverse=True)

    def _step(self) -> List[str]:
        """Perform the optimization process for a specified number of steps.

        This method iteratively improves the prompts using a differential evolution strategy.
        It evaluates prompts, generates new prompts using the DE algorithm, and replaces
        prompts if the new ones perform better.


        Returns:
            List[str]: The optimized list of prompts after all steps.
        """
        cur_best = self.prompts[0]
        meta_prompts = []
        for i in range(len(self.prompts)):
            # create meta prompts
            old_prompt = self.prompts[i]

            candidates = [prompt for prompt in self.prompts if prompt != old_prompt]
            a, b, c = np.random.choice(candidates, size=3, replace=False)

            if not self.donor_random:
                c = cur_best

            meta_prompt = (
                self.prompt_template.replace("<prompt0>", old_prompt)
                .replace("<prompt1>", a)
                .replace("<prompt2>", b)
                .replace("<prompt3>", c)
            )

            meta_prompts.append(meta_prompt)

        child_prompts = self.meta_llm.get_response(meta_prompts)
        child_prompts = [prompt.split("<prompt>")[-1].split("</prompt>")[0].strip() for prompt in child_prompts]

        child_scores = self.task.evaluate(child_prompts, self.predictor, return_agg_scores=True)

        for i in range(len(self.prompts)):
            if child_scores[i] > self.scores[i]:
                self.prompts[i] = child_prompts[i]
                self.scores[i] = child_scores[i]

        self.prompts = [prompt for _, prompt in sorted(zip(self.scores, self.prompts), reverse=True)]
        self.scores = sorted(self.scores, reverse=True)

        return self.prompts

__init__(predictor, task, prompt_template, meta_llm, initial_prompts=None, donor_random=False, callbacks=None, config=None)

Initialize the EvoPromptDE optimizer.

Source code in promptolution/optimizers/evoprompt_de.py
def __init__(
    self,
    predictor: "BasePredictor",
    task: "BaseTask",
    prompt_template: str,
    meta_llm: "BaseLLM",
    initial_prompts: List[str] = None,
    donor_random: bool = False,
    callbacks: List["BaseCallback"] = None,
    config: "ExperimentConfig" = None,
):
    """Initialize the EvoPromptDE optimizer."""
    self.prompt_template = prompt_template
    self.donor_random = donor_random
    self.meta_llm = meta_llm
    super().__init__(
        predictor=predictor, task=task, initial_prompts=initial_prompts, callbacks=callbacks, config=config
    )

evoprompt_ga

Module for EvoPromptGA optimizer.

EvoPromptGA

Bases: BaseOptimizer

EvoPromptGA: Genetic Algorithm-based Prompt Optimizer.

This class implements a genetic algorithm for optimizing prompts in large language models. It is adapted from the paper "Connecting Large Language Models with Evolutionary Algorithms Yields Powerful Prompt Optimizers" by Guo et al., 2023.

The optimizer uses crossover operations to generate new prompts from existing ones, with different selection methods available for choosing parent prompts.

Attributes:

Name Type Description
prompt_template str

Template for generating meta-prompts during crossover.

meta_llm

Language model used for generating child prompts from meta-prompts.

selection_mode str

Method for selecting parent prompts ('random', 'wheel', or 'tour').

Parameters:

Name Type Description Default
prompt_template str

Template for meta-prompts.

required
meta_llm BaseLLM

Language model for child prompt generation.

required
selection_mode str

Parent selection method. Defaults to "wheel".

'wheel'

Raises:

Type Description
AssertionError

If an invalid selection mode is provided.

Source code in promptolution/optimizers/evoprompt_ga.py
class EvoPromptGA(BaseOptimizer):
    """EvoPromptGA: Genetic Algorithm-based Prompt Optimizer.

    This class implements a genetic algorithm for optimizing prompts in large language models.
    It is adapted from the paper "Connecting Large Language Models with Evolutionary Algorithms
    Yields Powerful Prompt Optimizers" by Guo et al., 2023.

    The optimizer uses crossover operations to generate new prompts from existing ones,
    with different selection methods available for choosing parent prompts.

    Attributes:
        prompt_template (str): Template for generating meta-prompts during crossover.
        meta_llm: Language model used for generating child prompts from meta-prompts.
        selection_mode (str): Method for selecting parent prompts ('random', 'wheel', or 'tour').

    Args:
        prompt_template (str): Template for meta-prompts.
        meta_llm: Language model for child prompt generation.
        selection_mode (str, optional): Parent selection method. Defaults to "wheel".

    Raises:
        AssertionError: If an invalid selection mode is provided.
    """

    def __init__(
        self,
        predictor: "BasePredictor",
        task: "BaseTask",
        prompt_template: str,
        meta_llm: "BaseLLM",
        initial_prompts: List[str] = None,
        selection_mode: str = "wheel",
        callbacks: List["BaseCallback"] = None,
        config: "ExperimentConfig" = None,
    ):
        """Initialize the EvoPromptGA optimizer."""
        self.prompt_template = prompt_template
        self.meta_llm = meta_llm
        self.selection_mode = selection_mode
        super().__init__(
            predictor=predictor, initial_prompts=initial_prompts, task=task, callbacks=callbacks, config=config
        )
        assert self.selection_mode in ["random", "wheel", "tour"], "Invalid selection mode."

    def _pre_optimization_loop(self):
        self.scores = self.task.evaluate(self.prompts, self.predictor, return_agg_scores=True).tolist()
        # sort prompts by score
        self.prompts = [prompt for _, prompt in sorted(zip(self.scores, self.prompts), reverse=True)]
        self.scores = sorted(self.scores, reverse=True)

    def _step(self) -> List[str]:
        new_prompts = self._crossover(self.prompts, self.scores)
        prompts = self.prompts + new_prompts

        new_scores = self.task.evaluate(new_prompts, self.predictor, return_agg_scores=True).tolist()

        scores = self.scores + new_scores

        # sort scores and prompts
        self.prompts = [prompt for _, prompt in sorted(zip(scores, prompts), reverse=True)][: len(self.prompts)]
        self.scores = sorted(scores, reverse=True)[: len(self.prompts)]

        return self.prompts

    def _crossover(self, prompts, scores) -> str:
        """Perform crossover operation to generate new child prompts.

        This method selects parent prompts based on the chosen selection mode,
        creates meta-prompts using the prompt template, and generates new child
        prompts using the meta language model.

        Args:
            prompts (List[str]): List of current prompts.
            scores (List[float]): Corresponding scores for the prompts.

        Returns:
            List[str]: Newly generated child prompts.
        """
        # parent selection
        if self.selection_mode == "wheel":
            wheel_idx = np.random.choice(
                np.arange(0, len(prompts)),
                size=len(prompts),
                replace=True,
                p=np.array(scores) / np.sum(scores) if np.sum(scores) > 0 else np.ones(len(scores)) / len(scores),
            ).tolist()
            parent_pop = [self.prompts[idx] for idx in wheel_idx]

        elif self.selection_mode in ["random", "tour"]:
            parent_pop = self.prompts

        # crossover
        meta_prompts = []
        for _ in self.prompts:
            if self.selection_mode in ["random", "wheel"]:
                parent_1, parent_2 = np.random.choice(parent_pop, size=2, replace=False)
            elif self.selection_mode == "tour":
                group_1 = np.random.choice(parent_pop, size=2, replace=False)
                group_2 = np.random.choice(parent_pop, size=2, replace=False)
                # use the best of each group based on scores
                parent_1 = group_1[np.argmax([self.scores[self.prompts.index(p)] for p in group_1])]
                parent_2 = group_2[np.argmax([self.scores[self.prompts.index(p)] for p in group_2])]

            meta_prompt = self.prompt_template.replace("<prompt1>", parent_1).replace("<prompt2>", parent_2)
            meta_prompts.append(meta_prompt)

        child_prompts = self.meta_llm.get_response(meta_prompts)
        child_prompts = [prompt.split("<prompt>")[-1].split("</prompt>")[0].strip() for prompt in child_prompts]

        return child_prompts

__init__(predictor, task, prompt_template, meta_llm, initial_prompts=None, selection_mode='wheel', callbacks=None, config=None)

Initialize the EvoPromptGA optimizer.

Source code in promptolution/optimizers/evoprompt_ga.py
def __init__(
    self,
    predictor: "BasePredictor",
    task: "BaseTask",
    prompt_template: str,
    meta_llm: "BaseLLM",
    initial_prompts: List[str] = None,
    selection_mode: str = "wheel",
    callbacks: List["BaseCallback"] = None,
    config: "ExperimentConfig" = None,
):
    """Initialize the EvoPromptGA optimizer."""
    self.prompt_template = prompt_template
    self.meta_llm = meta_llm
    self.selection_mode = selection_mode
    super().__init__(
        predictor=predictor, initial_prompts=initial_prompts, task=task, callbacks=callbacks, config=config
    )
    assert self.selection_mode in ["random", "wheel", "tour"], "Invalid selection mode."

opro

Module implementing the OPRO (Optimization by PROmpting) algorithm.

OPRO

Bases: BaseOptimizer

OPRO: Optimization by PROmpting.

Implementation of the technique proposed in "Large Language Models as Optimizers" (Yang et al., 2023: https://arxiv.org/abs/2309.03409).

OPRO works by providing a meta-LLM with task descriptions and previous prompt-score pairs to generate improved prompts for a downstream LLM.

Source code in promptolution/optimizers/opro.py
class OPRO(BaseOptimizer):
    """OPRO: Optimization by PROmpting.

    Implementation of the technique proposed in "Large Language Models as Optimizers"
    (Yang et al., 2023: https://arxiv.org/abs/2309.03409).

    OPRO works by providing a meta-LLM with task descriptions and previous
    prompt-score pairs to generate improved prompts for a downstream LLM.
    """

    def __init__(
        self,
        predictor: "BasePredictor",
        task: "BaseTask",
        prompt_template: Optional[str],
        meta_llm: "BaseLLM",
        initial_prompts: List[str] = None,
        max_num_instructions: int = 20,
        num_instructions_per_step: int = 8,
        num_few_shots: int = 3,
        callbacks: List["BaseCallback"] = None,
        config: "ExperimentConfig" = None,
    ) -> None:
        """Initialize the OPRO optimizer.

        Args:
            predictor: Predictor for prompt evaluation
            task: Task object for prompt evaluation
            meta_llm: LLM that generates improved prompts
            initial_prompts: Initial set of prompts to start optimization with
            prompt_template: Custom meta prompt template (uses OPRO_TEMPLATE if None)
            max_num_instructions: Maximum previous instructions to include in meta prompt
            num_instructions_per_step: Number of prompts to generate in each step
            num_few_shots: Number of few-shot examples to include (0 for none)
            callbacks: List of callback functions
            config: "ExperimentConfig" overwriting default parameters
        """
        self.meta_llm = meta_llm

        self.meta_prompt_template = prompt_template if prompt_template else OPRO_TEMPLATE
        self.max_num_instructions = max_num_instructions
        self.num_instructions_per_step = num_instructions_per_step
        self.num_few_shots = num_few_shots
        super().__init__(
            predictor=predictor, task=task, initial_prompts=initial_prompts, callbacks=callbacks, config=config
        )

    def _sample_examples(self) -> str:
        """Sample few-shot examples from the dataset.

        Returns:
            Formatted string of few-shot examples with inputs and expected outputs
        """
        idx = np.random.choice(len(self.task.xs), self.num_few_shots)
        sample_x = self.task.xs[idx]
        sample_y = self.task.ys[idx]

        return "\n".join([f"Input: {x}\nOutput: {y}" for x, y in zip(sample_x, sample_y)])

    def _format_instructions(self) -> str:
        """Format previous prompts and their scores for the meta prompt.

        Returns:
            Formatted string of previous prompts and their scores,
            sorted by ascending score (worse to better)
        """
        prompt_score_pairs = list(zip(self.prompts, self.scores))
        sorted_pairs = sorted(prompt_score_pairs, key=lambda x: x[1])

        return "".join([f"text:\n{prompt}\nscore: {int(100 * round(score, 2))}\n\n" for prompt, score in sorted_pairs])

    def _add_prompt_and_score(self, prompt: str, score: float) -> None:
        """Add a prompt and its score to the lists, maintaining max length.

        Args:
            prompt: The prompt to add
            score: The corresponding score for the prompt
        """
        if prompt in self.prompts:
            return

        self.prompts.append(prompt)
        self.scores.append(score)

        # Keep only the top-performing prompts if we exceed the maximum number of instructions
        keep_indices = np.argsort(self.scores)[-self.max_num_instructions :]
        self.prompts = [self.prompts[i] for i in keep_indices]
        self.scores = [self.scores[i] for i in keep_indices]

    def _pre_optimization_loop(self):
        self.scores = list(self.task.evaluate(self.prompts, self.predictor))
        self.meta_prompt = self.meta_prompt_template.replace("<instructions>", self._format_instructions()).replace(
            "<examples>", self._sample_examples()
        )

    def _step(self) -> List[str]:
        duplicate_prompts = 0
        for _ in range(self.num_instructions_per_step):
            generation_seed = np.random.randint(0, int(1e9))
            self.meta_llm.set_generation_seed(generation_seed)

            response = self.meta_llm.get_response([self.meta_prompt])[0]

            prompt = response.split("<prompt>")[-1].split("</prompt>")[0].strip()

            if prompt in self.prompts:
                duplicate_prompts += 1
                continue

            score = self.task.evaluate(prompt, self.predictor)[0]

            self._add_prompt_and_score(prompt, score)

        # Update meta prompt
        self.meta_prompt = self.meta_prompt_template.replace("<instructions>", self._format_instructions()).replace(
            "<examples>", self._sample_examples()
        )

        return self.prompts

__init__(predictor, task, prompt_template, meta_llm, initial_prompts=None, max_num_instructions=20, num_instructions_per_step=8, num_few_shots=3, callbacks=None, config=None)

Initialize the OPRO optimizer.

Parameters:

Name Type Description Default
predictor BasePredictor

Predictor for prompt evaluation

required
task BaseTask

Task object for prompt evaluation

required
meta_llm BaseLLM

LLM that generates improved prompts

required
initial_prompts List[str]

Initial set of prompts to start optimization with

None
prompt_template Optional[str]

Custom meta prompt template (uses OPRO_TEMPLATE if None)

required
max_num_instructions int

Maximum previous instructions to include in meta prompt

20
num_instructions_per_step int

Number of prompts to generate in each step

8
num_few_shots int

Number of few-shot examples to include (0 for none)

3
callbacks List[BaseCallback]

List of callback functions

None
config ExperimentConfig

"ExperimentConfig" overwriting default parameters

None
Source code in promptolution/optimizers/opro.py
def __init__(
    self,
    predictor: "BasePredictor",
    task: "BaseTask",
    prompt_template: Optional[str],
    meta_llm: "BaseLLM",
    initial_prompts: List[str] = None,
    max_num_instructions: int = 20,
    num_instructions_per_step: int = 8,
    num_few_shots: int = 3,
    callbacks: List["BaseCallback"] = None,
    config: "ExperimentConfig" = None,
) -> None:
    """Initialize the OPRO optimizer.

    Args:
        predictor: Predictor for prompt evaluation
        task: Task object for prompt evaluation
        meta_llm: LLM that generates improved prompts
        initial_prompts: Initial set of prompts to start optimization with
        prompt_template: Custom meta prompt template (uses OPRO_TEMPLATE if None)
        max_num_instructions: Maximum previous instructions to include in meta prompt
        num_instructions_per_step: Number of prompts to generate in each step
        num_few_shots: Number of few-shot examples to include (0 for none)
        callbacks: List of callback functions
        config: "ExperimentConfig" overwriting default parameters
    """
    self.meta_llm = meta_llm

    self.meta_prompt_template = prompt_template if prompt_template else OPRO_TEMPLATE
    self.max_num_instructions = max_num_instructions
    self.num_instructions_per_step = num_instructions_per_step
    self.num_few_shots = num_few_shots
    super().__init__(
        predictor=predictor, task=task, initial_prompts=initial_prompts, callbacks=callbacks, config=config
    )

templates

Meta-prompt templates for different prompt optimization methods.