Source code for metricx.selector

from collections import Counter
from typing import Callable, List, Optional, Tuple, Union

import numpy as np

from .metric import Metric
from .task import Task


[docs]def random_policy(task: Task, metric: Optional[Union[str, Metric]] = None) -> str:
    """Select a random model."""
    return np.random.choice(list(task.results.keys()))


[docs]def stderr_policy(task: Task, metric: Optional[Union[str, Metric]] = None) -> str:
    """Select the model with the largest variance."""
    mu_var_n = task.model_to_mu_var_n(metric)
    stderr_model = [
        (np.sqrt(var / n), model) for model, (_, var, n) in mu_var_n.items()
    ]
    _, model = list(sorted(stderr_model))[-1]
    return model


[docs]def ucb_policy(task: Task, metric: Optional[Union[str, Metric]] = None) -> str:
    """Select the model with the largest upper confidence bound."""
    mu_var_n = task.model_to_mu_var_n(metric)
    ucb_model = [(mu + np.sqrt(var), model) for model, (mu, var, n) in mu_var_n.items()]
    _, model = list(sorted(ucb_model))[-1]
    return model


[docs]def power_policy(task: Task, metric: Optional[Union[str, Metric]] = None) -> str:
    """Select the model with the most samples needed to achieve power."""
    model_to_samples: Counter = Counter()
    for modelA in task.results.keys():
        model_to_samples[modelA] = 0
        for modelB in task.results.keys():
            if modelA == modelB:
                continue
            nA, nB = task.samples_to_achieve_power(modelA, modelB, metric)
            model_to_samples[modelA] += nA
            model_to_samples[modelB] += nB
    model, _ = model_to_samples.most_common(1)[0]
    return model


[docs]class Selector:
    """This class implements methods for selecting models to run.

    Given a target metric, the Selector class provides methods for selecting
    models to run. The default policy is to:

        1. Obtain `min_samples=3` for each model.
        2. Sample from `policies`:

            - (p=0.25) Sample a random model.
            - (p=0.25) Select the model with the largest standard errror.
            - (p=0.25) Select the model with the largest number of samples
              needed to achieve power.
            - (p=0.25) Select the model with the largest upper confidence bound.

    Attributes:
        task: The target task.
        policies: A list of tuples containing a policy and the probability of
            selecting that policy.
        min_samples: The minimum number of samples required for each model
            before the randomized policies are applied.
    """

    DEFAULT_POLICY = [
        (random_policy, 0.25),
        (stderr_policy, 0.25),
        (power_policy, 0.25),
        (ucb_policy, 0.25),
    ]

    def __init__(
        self,
        task: Task,
        policies: List[Tuple[Callable, float]] = [],
        min_samples: int = 3,
    ):
        self.task = task
        self.min_samples = min_samples
        self.policies = policies if policies else self.DEFAULT_POLICY

[docs]    def propose(self, metric: Optional[Union[str, Metric]] = None) -> str:
        """This selects a model to execute.

        Args:
            metric: The target metric to optimize.

        Returns:
            The model to execute.
        """
        for model, results in self.task.results.items():
            if len(results) < self.min_samples:
                return model
        policies, probabilities = zip(*self.policies)
        policy = np.random.choice(policies, p=probabilities)  # type: ignore
        return policy(self.task, metric)