Source code for bnelearn.util.metrics

"""This module implements metrics that may be interesting."""

from typing import Tuple
import torch
import matplotlib.pyplot as plt

from bnelearn.bidder import Bidder
from bnelearn.environment import AuctionEnvironment
from bnelearn.mechanism import Mechanism, TullockContest, CrowdsourcingContest
from bnelearn.strategy import Strategy
from bnelearn.util.tensor_util import apply_with_dynamic_mini_batching

## defines a mapping of internal metrics and their desired tensorboard output tags
MAPPING_METRICS_TAGS = {
    'utilities':            'market/utilities',
    'efficiency':           'market/efficiency',
    'revenue':              'market/revenue',
    'update_norm':          'learner_info/update_norm',
    'gradient_norm':        'learner_info/gradient_norm',
    'util_loss_ex_ante':    'eval/util_loss_ex_ante',
    'util_loss_ex_interim': 'eval/util_loss_ex_interim',
    'estimated_relative_ex_ante_util_loss': 'eval/estimated_relative_ex_ante_util_loss',
    'utility_vs_bne':       'eval_vs_bne/utility_vs_bne',
    'epsilon_relative':     'eval_vs_bne/epsilon_relative',
    'epsilon_absolute':     'eval_vs_bne/epsilon_absolute',
    'L_2':                  'eval_vs_bne/L_2',
    'L_inf':                'eval_vs_bne/L_inf',
    'overhead_hours':       'meta/overhead_hours',

    # won't actually be logged
    'prev_params':          'learner_info/prev_params'
}

# aliases of tf tags for plotting/publications
#pylint: disable=anomalous-backslash-in-string
ALIASES_LATEX = {
    'market/efficiency':           'efficiency $\mathcal{E}$',
    'market/revenue':              'revenue $\mathcal{R}$',
    'market/utilities':            'ex-ante utility $\\tilde{u}$',

    'eval/util_loss_ex_ante':    '$\hat \ell$',
    'eval/util_loss_ex_interim': '$\hat \epsilon$',
    'eval/estimated_relative_ex_ante_util_loss': 'approximate relative utility loss $\hat{\mathcal{L}}$',

    'eval_vs_bne/L_2':                  '$L_2$',
    'eval_vs_bne/L_inf':                '$L_\infty$',
    'eval_vs_bne/epsilon_absolute':     '$\epsilon_\text{abs}$',
    'eval_vs_bne/epsilon_relative':     '$\mathcal{L}$',
    'eval_vs_bne/utility_vs_bne':       '$\hat u(\beta_i, \beta^*_{-i})$',

    'meta/overhead_hours':       '$T$',

    'learning_info/update_norm':          '$|\Delta \theta|$'
}


[docs]def norm_actions(b1: torch.Tensor, b2: torch.Tensor, p: float = 2) -> float:
    r"""Calculates the approximate "mean" Lp-norm between two action vectors.

    .. math::
        \sum_{i=1}^n(1/n \cdot |b_1 - b_2|^p)^{1/p}

    If p = Infty, this evaluates to the supremum.
    """
    assert b1.shape == b2.shape

    if p == float('Inf'):
        return (b1 - b2).abs().max()

    # finite p
    n = float(b1.shape[0])

    # calc. norm & detach for disregarding any gradient info
    return (torch.dist(b1, b2, p=p)*(1./n)**(1/p)).detach()

[docs]def norm_strategies(strategy1: Strategy, strategy2: Strategy, valuations: torch.Tensor, p: float=2) -> float:
    r"""Calculates the approximate "mean" :math:`L_p`-norm between two strategies
    approximated via Monte-Carlo integration on a sample of valuations that
    have been drawn according to the prior.

    The function :math:`L_p` norm is given by

    .. math::
        \left( \int_V |s_1(v) - s_2(v)|^p dv \right)^{1/p}.

    With Monte-Carlo integration this is approximated by

    .. math::
        \left( |V|/n \cdot \sum_i^n(|s1(v) - s2(v)|^p) \right)^{1/p}

    where :math:`|V|` is the volume of the set :math:`V`. Here, we ignore the volume. This
    gives us the RMSE for :math:`L_2`, supremum for :math:`L`-infty, etc.
    """
    b1 = strategy1.play(valuations)
    b2 = strategy2.play(valuations)

    return norm_actions(b1, b2, p)

[docs]def norm_strategy_and_actions(strategy, actions, valuations: torch.Tensor, p: float=2, componentwise=False) -> torch.Tensor:
    """Calculates the norm as above, but given one action vector and one strategy.
    The valuations must match the given actions.

    This helper function is useful when recalculating an action vector is prohibitive and it should be reused.

    Args:
        strategy: Strategy
        actions: torch.Tensor
        valuations: torch.Tensor
        p: float=2
        componentwise: bool=False, only returns smallest norm of all output
            dimensions if true
    Returns:
        norm: (scalar Tensor)
    """
    s_actions = strategy.play(valuations)

    if componentwise:
        component_norms = [norm_actions(s_actions[..., d], actions[..., d], p)
                          for d in range(actions.shape[-1])]
        # select that component with the smallest norm
        return min(component_norms)
    else:
        return norm_actions(s_actions, actions, p)

def _create_grid_bid_profiles(bidder_position: int, grid: torch.Tensor, bid_profile: torch.Tensor):
    """Given an original bid profile, creates a tensor of (grid_size *
    batch_size) batches of bid profiles, where for each original batch, the
    player's bid is replaced by each possible bid in the grid.

    Args:
        bidder_position: int - the player who's bids will be replaced
        grid: FloatTensor (grid_size x n_items): tensor of possible bids to
            be evaluated
        bid_profile: FloatTensor (batch_size x n_players x n_items)
    Returns:
        bid_profile: FloatTensor (grid_size*batch_size x n_players x
            n_items)
    """
    # version with size checks: (slower)
    # batch_size, _, n_items = bid_profile.shape #batch x player x item
    # n_candidates, n_items = candidate_bids.shape # candidates x item
    #assert n_items == n_items2, "input tensors don't match"

    batch_size, _, _ = bid_profile.shape #batch x player x item
    n_candidates, _ = grid.shape # candidates x item

    bid_profile = bid_profile.repeat(n_candidates, 1, 1)
    bid_profile[:, bidder_position, :] = grid.repeat_interleave(repeats = batch_size, dim=0)

    return bid_profile #bid_eval_size*batch, 1,n_items

[docs]def ex_post_util_loss(mechanism: Mechanism, bidder_valuations: torch.Tensor, bid_profile: torch.Tensor, bidder: Bidder,
                      grid: torch.Tensor, half_precision = False):
    r"""Estimates a bidder's ex post util_loss in the current bid_profile vs a potential grid,
    i.e. the potential benefit of having deviated from the current strategy, as:

    .. math::
        \texttt{util_loss} = max(0, BR(v_i, b_-i) - u_i(b_i, b_-i))

    Args:
        mechanism
        player_valuations: the valuations of the player that is to be evaluated
        bid_profile: (batch_size x n_player x n_items)
        bidder: a Bidder (used to retrieve valuations and utilities)
        grid: Option 1: 1d tensor with length grid_size todo for ``n_items > 1``,
            all ``grid_size**n_items`` combination will be used. Should be replaced
            by e.g. ``torch.meshgrid``. Option 2: tensor with shape (grid_size, n_items)
        player_position (optional): specific position in which the player will be evaluated
            (defaults to player_position of bidder)
        half_precision: (optional, bool) Whether to use half precision tensors. default: false

    Returns:
        util_loss (batch_size)

    Useful: To get the memory used by a tensor (in MB): ``(tensor.element_size() *
    tensor.nelement())/(1024*1024)``
    """

    player_position = bidder.player_position

    ## Use smaller dtypes to save memory
    if half_precision:
        bid_profile = bid_profile.half()
        bidder_valuations = bidder_valuations.half()
        grid = grid.half()

    #Generalize these dimensions
    batch_size, n_players, n_items = bid_profile.shape # pylint: disable=unused-variable
    grid_size = grid.shape[0] #update this
    # Create multidimensional bid tensor if required
    if n_items == 1:
        grid = grid.view(grid_size, 1).to(bid_profile.device)
    elif n_items >= 2 and len(grid.shape) == 1:
        grid = torch.combinations(grid, r=n_items, with_replacement=True).to(bid_profile.device) #grid_size**n_items x n_items
        # Stefan: this only works if both bids are over the same action space (what if one of these is the bid for a bundle?)
    grid_size, _ = grid.shape # this _new_ grid size refers to all combinations, whereas the previous one was 1D only

    ### Evaluate alternative bids on grid
    grid_bid_profile = _create_grid_bid_profiles(player_position, grid, bid_profile) # (grid_size*batch_size) x n_players x n_items
    ## Calculate allocation and payments for alternative bids given opponents bids
    allocation, payments = mechanism.play(grid_bid_profile)

    # we only need the specific player's allocation and can get rid of the rest.
    a_i = allocation[:,player_position,:]
    p_i = payments[:,player_position] # 1D tensor of length (grid * batch)

    utility_grid = bidder.get_utility(
        a_i, p_i, bidder_valuations.repeat_interleave(grid_size, dim=0)
        ).view(grid_size, batch_size)
    best_response_utility, _ = utility_grid.max(0)

    ## Evaluate actual bids
    allocation, payments = mechanism.play(bid_profile)
    a_i = allocation[:,player_position,:]
    p_i = payments[:,player_position]

    actual_utility = bidder.get_utility(a_i, p_i, bidder_valuations)

    return (best_response_utility - actual_utility).relu() # set 0 if actual bid is best (no difference in limit, but might be valuated if grid too sparse)

[docs]def ex_interim_util_loss(env: AuctionEnvironment, player_position: int,
                         agent_observations: torch.Tensor,
                         grid_size: int,
                         opponent_batch_size: int = None,
                         grid_best_response: bool = False, mute: bool = False):
    #pylint: disable = anomalous-backslash-in-string
    """Estimates a bidder's utility loss in the current state of the
    environment, i.e. the potential benefit of deviating from the current
    strategy, evaluated at each point of the agent_valuations. Therefore, we
    calculate

    .. math::
        \max_{v_i \in V_i} \max_{b_i^* \in A_i} E_{v_{-i}|v_i} [u(v_i, b_i^*, b_{-i}(v_{-i})) - u(v_i, b_i, b_{-i}(v_{-i}))]

    We're conditioning on the agent's observation at `player_position`. That
    means, types and observations of other players as well as its own type have
    to be conditioned. As it's     conditioned on the observation, the agent's
    action stays the same.

    Args:
        env: bnelearn.Environment.
        player_position: int, position of the player in the environment.
        grid_size: int, stating the number of alternative actions sampled via
            env.agents[player_position].get_valuation_grid(grid_size, True).
        opponent_batch_size: int, specifying the sample size for opponents.
        grid_best_response: bool, whether or not the BRs live on the grid or
            possibly come from the actual actions (in case no better response
            was found on grid).
        mute: bool, mute stdout.

    Returns:
        utility_loss (torch.Tensor, shape: [batch_size]):  the computed
            approximate utility loss for for each input observation.
        best_response (torch.Tensor, shape: [batch_size, action_size]):
            the best response found for each input observation (This is
            either a grid point, or the actual action according to the player's
            strategy.)

    Remarks:
        Relies on availability of `draw_conditional_profiles` and
        `generate_valuation_grid` in the `env`'s ValuationObservationSampler.
    """

    mechanism = env.mechanism
    device = agent_observations.device
    agent: Bidder = env.agents[player_position]
    # ensure we are not propagating any gradients (may cause memory leaks)
    agent_observations = agent_observations.detach().clone()

    agent_batch_size, _ = agent_observations.shape
    opponent_batch_size = opponent_batch_size or agent_batch_size

    ####### get actual utility #############################
    agent_action_actual = agent.get_action(agent_observations)
    utility_actual = ex_interim_utility(
        env, player_position, agent_observations, agent_action_actual,
        opponent_batch_size, device)

    ####### get best responses over grid of alternative actions #######
    action_alternatives = env.sampler.generate_action_grid(
        player_position=player_position,
        minimum_number_of_points=grid_size,
        dtype=agent_action_actual.dtype, device=agent_action_actual.device
    )
    action_size = action_alternatives.shape[-1]

    get_br_utily_and_index = lambda obs: _get_best_responses_among_alternatives(
        env, player_position, obs, action_alternatives, opponent_batch_size)
    br_utility, br_indices = apply_with_dynamic_mini_batching(
        function=get_br_utily_and_index,
        args=agent_observations, mute=mute)

    ##### calculate the loss and return best responses ###########
    utility_loss = (br_utility - utility_actual).relu_()

    # BR only on grid
    if grid_best_response:
        br_actions = action_alternatives[br_indices]
    else:
        actual_was_best = (utility_loss == 0).unsqueeze_(1).repeat(1, action_size)
        br_actions = actual_was_best * agent_action_actual + \
                 actual_was_best.logical_not() * action_alternatives[br_indices]

    return (utility_loss, br_actions)

def _get_best_responses_among_alternatives(
        env: AuctionEnvironment, player_position: int,
        agent_observations: torch.Tensor, action_alternatives: torch.Tensor,
        opponent_batch_size: int) -> Tuple[torch.Tensor, torch.IntTensor]:
    """For a batch of observations for the given player, calculates the
    ex-interim best response from a fixed set of alternatives.

    Returns:
        br_utility (torch.FloatTensor of size [agent_batch_size])
        br_indices (torch.IntTensor of size [agent_batch_size]): the indices of
            the best actions in action_alternatives
    """

    grid_size, action_size = action_alternatives.shape
    agent_batch_size, _ = agent_observations.shape
    device = env.mechanism.device

    ## grid_size x agent_batch_size x action_size
    grid_actions = action_alternatives \
        .view(grid_size, 1, action_size) \
        .repeat([1, agent_batch_size, 1])
    ## grid_size x agent_batch_size x observation_size
    grid_observations = agent_observations.repeat([grid_size, 1, 1])

    # grid_size x agent_batch_size
    grid_utilities = ex_interim_utility(
        env, player_position, grid_observations,
        grid_actions, opponent_batch_size, device
        )

    # for each agent_observation, find the best response
    # each have shape: [agent_batch_size]
    br_utility, br_indices = grid_utilities.max(dim=0)
    return br_utility, br_indices

[docs]def ex_interim_utility(
        env: AuctionEnvironment, player_position: int,
        agent_observations: torch.Tensor, agent_actions: torch.Tensor,
        opponent_batch_size: int, device) -> torch.Tensor:
    """
    Calculates the ex-interim utility of a given agent in the environment,
    given (batches of) their observations and actions.

    Can handle multiple batch dimensions for the agent.

    Args:
        env (AuctionEnvironment): The environment from which conditional type
            profiles and opponent actions will be sampled.
        player_position (int): the position of the agent to be evaluated
        agent_observations (Tensor of dim (*agent_batch_sizes x observation_size))
        agent_actions      (Tensor of dim (*agent_batch_sizes x action_size))
        opponent_batch_size (int): how many conditional valuations and opponent
            observations to sample for each agent_batch entry. The expected
            ex-interim utility will then be approximated by the sample mean
            over the opponent_batch_size dimension.
        device (device):    The output device.

    Returns:
        utility: (Tensor of dim (*agent_batch_sizes)): the resulting empirical
            ex-interim utilities.
    """
    mechanism = env.mechanism
    agent = env.agents[player_position]

    *batch_dims, _ = range(agent_actions.dim())
    *agent_batch_sizes, action_size = agent_actions.shape
    assert agent_observations.shape[:len(batch_dims)] == torch.Size(agent_batch_sizes), \
        """observations and actions must have the same batch sizes!"""
    action_dtype = agent_actions.dtype
    # draw conditional observations conditioned on `agent`'s observation:
    # co has dimension (*agent_batches , opponent_batch, n_players, observation_size)
    # each agent_observations is repeated opponent_batch_size times
    cv, co = env.draw_conditionals(
        player_position, agent_observations, opponent_batch_size, device
        )

    action_profile_actual = torch.zeros(
        *agent_batch_sizes, opponent_batch_size, env.n_players, action_size,
        dtype=action_dtype, device=device
        )

    action_profile_actual[...,:,player_position,:] = \
        agent_actions \
            .view(*agent_batch_sizes, 1, action_size) \
            .repeat(*([1]*len(agent_batch_sizes)), opponent_batch_size, 1)

    for a in env.agents:
        if a.player_position != player_position:
            action_profile_actual[..., a.player_position, :] = \
                a.strategy.play(co[..., a.player_position, :])

    # shapes: allocations: *agent_batches x opponent_batch x n_players x n_items
    #         payments:    *agent_batches x opponent_batch x n_players
    allocations, payments = mechanism.play(action_profile_actual)

    if isinstance(mechanism, TullockContest) or isinstance(mechanism, CrowdsourcingContest):
        agent_allocations = allocations[..., player_position, :]
    else:
        agent_allocations = allocations[..., player_position, :].type(torch.bool)
    agent_payments = payments[..., player_position]
    agent_valuations = cv[..., player_position, :]
    # shape of utility: *agent_batch_sizes x opponent_batch_size
    utility = agent.get_utility(
        agent_allocations, agent_payments, agent_valuations
        )

    # expectation over opponent batches
    utility = torch.mean(utility, axis=-1) #dim: agent_batch_size
    return utility