Source code for bnelearn.bidder

# -*- coding: utf-8 -*-
"""Bidder module

This module implements players / bidders / agents in games.

"""

from abc import ABC, abstractmethod
import warnings
import torch
from bnelearn.strategy import (Strategy, MatrixGameStrategy,
                               FictitiousPlayStrategy, FictitiousNeuralPlayStrategy)

[docs]class Player(ABC): """ A player in a game, determined by her - strategy - utility function over outcomes """ def __init__(self, strategy, player_position=None, batch_size=1, cuda=True): self.cuda = cuda and torch.cuda.is_available() self.device = 'cuda' if self.cuda else 'cpu' self.player_position :int = player_position # None in dynamic environments! self.strategy = strategy self.batch_size = batch_size
[docs] @abstractmethod def get_action(self): """Chooses an action according to the player's strategy."""
[docs] @abstractmethod def get_utility(self, *args, **kwargs): """Calculates player's utility based on outcome of a game."""
[docs]class MatrixGamePlayer(Player): """ A player playing a matrix game""" def __init__(self, strategy, player_position=None, batch_size=1, cuda=True): super().__init__(strategy, player_position=player_position, batch_size=batch_size, cuda=cuda)
[docs] def get_utility(self, *outcome): #pylint: disable=arguments-differ """ get player's utility for a batch of outcomes""" # for now, outcome is (allocation, payment) _, payments = outcome return -payments
[docs] def get_action(self): if (isinstance(self.strategy, MatrixGameStrategy) or isinstance(self.strategy, FictitiousNeuralPlayStrategy)): return self.strategy.play(batch_size=self.batch_size) if isinstance(self.strategy, FictitiousPlayStrategy): return self.strategy.play(self.player_position) raise ValueError("Invalid Strategy Type for Matrix game: {}".format(type(self.strategy)))
[docs]class Bidder(Player): """A player in an auction game. Has a distribution over valuations/types that is common knowledge. These valuations correspond to the ´n_items´ available. Attributes: batch_size: corresponds to the number of individual auctions. descending_valuations: if is true, the valuations will be returned in decreasing order. enable_action_caching: determines whether actions should be cached and retrieved from memory, rather than recomputed as long as valuations haven't changed. TODO ... """ def __init__(self, strategy: Strategy, player_position: torch.Tensor = None, batch_size: int = 1, valuation_size: int = 1, observation_size: int = 1, bid_size: int = 1, cuda: str = True, enable_action_caching: bool = False, risk: float = 1.0 ): super().__init__(strategy, player_position, batch_size, cuda) self.valuation_size = valuation_size self.observation_size = observation_size self.bid_size = bid_size self.risk = risk self._enable_action_caching = enable_action_caching self._cached_observations_changed = False # true if new observations drawn since actions calculated self._cached_observations = None self._cached_valuations_changed = False # true if new observations drawn since actions calculated self._cached_valuations = None if self._enable_action_caching: self._cached_valuations = torch.zeros(batch_size, valuation_size, device=self.device) self._cached_observations = torch.zeros(batch_size, observation_size, device=self.device) self._cached_actions = torch.zeros(batch_size, bid_size, device=self.device) @property def cached_observations(self): return self._cached_observations @cached_observations.setter def cached_observations(self, new_value: torch.Tensor): """When manually setting observations, make sure that the _observations_changed flag is set correctly.""" if new_value.shape != self._cached_observations.shape: warnings.warn("New observations have different shape than specified in Bidder object!") if (new_value.dtype, new_value.device) != (self._cached_observations.dtype, self._cached_observations.device): warnings.warn( "New observations have different dtype and/or device than bidder. Converting to {},{}".format( self._cached_observations.device, self._cached_observations.dtype) ) if not new_value.equal(self._cached_observations): self._cached_observations = new_value.to(self._cached_observations.device, self._cached_observations.dtype) self._cached_observations_changed = True @property def cached_valuations(self): return self._cached_valuations @cached_valuations.setter def cached_valuations(self, new_value: torch.Tensor): """When manually setting valuations, make sure that the _valuations_changed flag is set correctly.""" if new_value.shape != self._cached_valuations.shape: warnings.warn("New valuations have different shape than specified in Bidder object!") if (new_value.dtype, new_value.device) != (self._cached_valuations.dtype, self._cached_valuations.device): warnings.warn( "New valuations have different dtype and/or device than bidder. Converting to {},{}".format( self._cached_valuations.device, self._cached_valuations.dtype) ) if not new_value.equal(self._cached_valuations): self._cached_valuations = new_value.to(self._cached_valuations.device, self._cached_valuations.dtype) self._cached_valuations_changed = True
[docs] def get_utility(self, allocations, payments, valuations=None): """ For a batch of valuations, allocations, and payments of the bidder, return their utility. Can handle multiple batch dimensions, e.g. for allocations a shape of ( outer_batch_size, inner_batch_size, n_items). These batch dimensions are kept in returned payoff. """ if valuations is None: valuations = self._cached_valuations welfare = self.get_welfare(allocations, valuations) payoff = welfare - payments if self.risk == 1.0: return payoff # payoff^alpha not well defined in negative domain for risk averse agents # the following is a memory-saving implementation of #return payoff.relu()**self.risk - (-payoff).relu()**self.risk return payoff.relu().pow_(self.risk).sub_(payoff.neg_().relu_().pow_(self.risk))
[docs] def get_welfare(self, allocations, valuations=None): """For a batch of allocations return the player's welfare. If valuations are not specified, welfare is calculated for `self.valuations`. Can handle multiple batch dimensions, e.g. for valuations a shape of (..., batch_size, n_items). These batch dimensions are kept in returned welfare. """ assert allocations.dim() >= 2 # [batch_sizes] x items if valuations is None: valuations = self._cached_valuations item_dimension = valuations.dim() - 1 welfare = (valuations * allocations).sum(dim=item_dimension) return welfare
[docs] def get_action(self, observations = None, deterministic: bool = False): """Calculate action from given observations, or retrieve from cache""" if self._enable_action_caching and not self._cached_observations_changed and \ (observations is None or torch.equal(observations, self._cached_observations)): return self._cached_actions if observations is None: assert self._enable_action_caching, \ "Action caching is disabled but no observation argument was provided to get_actions." # No observations have been given, but _cached_observations_changed # use cached observations but recompute actions observations = self._cached_observations #TODO: there was a reshaping here added by Nils (to self.batch_size, -1). This is problematic, should be done # in strategy, not here. Strategy should always map complete obs to complete actions. inputs = observations # for cases when n_observations != input_length (e.g. Split-Award Auctions, combinatorial auctions with bid languages) # TODO: generalize this, see #82. https://gitlab.lrz.de/heidekrueger/bnelearn/issues/82 if hasattr(self.strategy, 'input_length') and self.strategy.input_length != self.observation_size: warnings.warn("Strategy expects shorter input_length than n_items. Truncating observations...") dim = self.strategy.input_length inputs = inputs[..., :dim] actions = self.strategy.play(inputs, deterministic=deterministic) if self._enable_action_caching: self.cached_observations = observations self._cached_actions = actions # we have updated the cached actions, so we can disable the # flag that they need to be recomputed. self._cached_observations_changed = False return actions
[docs]class ReverseBidder(Bidder): """Bidder that has reversed utility :math:`\cdot (-1)` as valuations correspond to their costs and payments to what they get payed. """ def __init__(self, efficiency_parameter=None, **kwargs): self.efficiency_parameter = efficiency_parameter super().__init__(**kwargs)
[docs] def get_utility(self, allocations, payments, valuations = None): """For reverse bidders, returns are inverted. """ return - super().get_utility(allocations, payments, valuations)
[docs]class CombinatorialBidder(Bidder): """Bidder in combinatorial auctions. Note: Currently only set up for full LLG setting. """ def __init__(self, **kwargs): super().__init__(**kwargs) if hasattr(self.strategy, 'input_length'): # `ClosureStrategy` doesn't have `input_length` self.input_length = self.strategy.input_length self.output_length = self.strategy.output_length else: self.input_length = self.valuation_size self.output_length = self.bid_size
[docs] def get_welfare(self, allocations, valuations: torch.Tensor=None) -> torch.Tensor: assert allocations.dim() >= 2 # *batch_sizes x items if valuations is None: valuations = self._cached_valuations item_dimension = valuations.dim() - 1 valuations_extended = torch.zeros_like(allocations, dtype=torch.float) if self.player_position == 2: valuations_extended[..., [2]] = valuations else: valuations_extended[..., [self.player_position, 2]] = valuations welfare = (valuations_extended * allocations).sum(dim=item_dimension) return welfare
[docs]class Contestant(Bidder):
[docs] def get_utility(self, winning_probabilities, payments, valuations=None): """ For a batch of valuations, allocations, and payments of the contestant, return their utility. Can handle multiple batch dimensions, e.g. for allocations a shape of ( outer_batch_size, inner_batch_size, n_items). These batch dimensions are kept in returned payoff. """ if valuations is None: valuations = self._cached_valuations welfare = self.get_welfare(payments, valuations) try: payoff = winning_probabilities - welfare.unsqueeze(-1) except: print(2) return payoff.squeeze()
[docs] def get_welfare(self, payments, valuations=None): """For a batch of allocations return the player's welfare. If valuations are not specified, welfare is calculated for `self.valuations`. Can handle multiple batch dimensions, e.g. for valuations a shape of (..., batch_size, n_items). These batch dimensions are kept in returned welfare. """ #assert payments.dim() >= 2 # [batch_sizes] x items if valuations is None: valuations = self._cached_valuations item_dimension = valuations.dim() - 1 welfare = (valuations * payments.unsqueeze(-1)).sum(dim=item_dimension) return welfare
[docs]class CrowdsourcingContestant(Bidder): def __init__(self, strategy: Strategy, player_position: int, batch_size: int, enable_action_caching: bool = False, crowdsourcing_values: bool = True, value_contest: bool = True): super().__init__(strategy, player_position, batch_size, enable_action_caching=enable_action_caching) self.crowdsourcing_values = crowdsourcing_values self.num_classes = self.crowdsourcing_values.shape[0] self.value_contest = value_contest
[docs] def get_utility(self, allocations, payments, ability=None): """ For a batch of valuations, allocations, and payments of the contestant, return their utility. Can handle multiple batch dimensions, e.g. for allocations a shape of ( outer_batch_size, inner_batch_size, n_items). These batch dimensions are kept in returned payoff. """ if ability is None: ability = self._cached_valuations # retrieve valuations ## one hot encoding allocations = torch.nn.functional.one_hot(allocations.long(), self.num_classes) allocations = (allocations * self.crowdsourcing_values).sum(-1) if self.value_contest: allocations = allocations * ability disutil = payments.unsqueeze(-1) else: # disutlity disutil = ability * payments.unsqueeze(-1) payoff = allocations - disutil return payoff.squeeze(-1)