# -*- coding: utf-8 -*-
"""Bidder module
This module implements players / bidders / agents in games.
"""
from abc import ABC, abstractmethod
import warnings
import torch
from bnelearn.strategy import (Strategy, MatrixGameStrategy,
FictitiousPlayStrategy, FictitiousNeuralPlayStrategy)
[docs]class Player(ABC):
"""
A player in a game, determined by her
- strategy
- utility function over outcomes
"""
def __init__(self, strategy, player_position=None, batch_size=1, cuda=True):
self.cuda = cuda and torch.cuda.is_available()
self.device = 'cuda' if self.cuda else 'cpu'
self.player_position :int = player_position # None in dynamic environments!
self.strategy = strategy
self.batch_size = batch_size
[docs] @abstractmethod
def get_action(self):
"""Chooses an action according to the player's strategy."""
[docs] @abstractmethod
def get_utility(self, *args, **kwargs):
"""Calculates player's utility based on outcome of a game."""
[docs]class MatrixGamePlayer(Player):
""" A player playing a matrix game"""
def __init__(self, strategy, player_position=None, batch_size=1, cuda=True):
super().__init__(strategy, player_position=player_position,
batch_size=batch_size, cuda=cuda)
[docs] def get_utility(self, *outcome): #pylint: disable=arguments-differ
""" get player's utility for a batch of outcomes"""
# for now, outcome is (allocation, payment)
_, payments = outcome
return -payments
[docs] def get_action(self):
if (isinstance(self.strategy, MatrixGameStrategy) or isinstance(self.strategy, FictitiousNeuralPlayStrategy)):
return self.strategy.play(batch_size=self.batch_size)
if isinstance(self.strategy, FictitiousPlayStrategy):
return self.strategy.play(self.player_position)
raise ValueError("Invalid Strategy Type for Matrix game: {}".format(type(self.strategy)))
[docs]class Bidder(Player):
"""A player in an auction game. Has a distribution over valuations/types
that is common knowledge. These valuations correspond to the ´n_items´
available.
Attributes:
batch_size: corresponds to the number of individual auctions.
descending_valuations: if is true, the valuations will be returned
in decreasing order.
enable_action_caching: determines whether actions should be cached and
retrieved from memory, rather than recomputed as long as valuations
haven't changed.
TODO ...
"""
def __init__(self,
strategy: Strategy,
player_position: torch.Tensor = None,
batch_size: int = 1,
valuation_size: int = 1,
observation_size: int = 1,
bid_size: int = 1,
cuda: str = True,
enable_action_caching: bool = False,
risk: float = 1.0
):
super().__init__(strategy, player_position, batch_size, cuda)
self.valuation_size = valuation_size
self.observation_size = observation_size
self.bid_size = bid_size
self.risk = risk
self._enable_action_caching = enable_action_caching
self._cached_observations_changed = False # true if new observations drawn since actions calculated
self._cached_observations = None
self._cached_valuations_changed = False # true if new observations drawn since actions calculated
self._cached_valuations = None
if self._enable_action_caching:
self._cached_valuations = torch.zeros(batch_size, valuation_size, device=self.device)
self._cached_observations = torch.zeros(batch_size, observation_size, device=self.device)
self._cached_actions = torch.zeros(batch_size, bid_size, device=self.device)
@property
def cached_observations(self):
return self._cached_observations
@cached_observations.setter
def cached_observations(self, new_value: torch.Tensor):
"""When manually setting observations, make sure that the _observations_changed flag is set correctly."""
if new_value.shape != self._cached_observations.shape:
warnings.warn("New observations have different shape than specified in Bidder object!")
if (new_value.dtype, new_value.device) != (self._cached_observations.dtype, self._cached_observations.device):
warnings.warn(
"New observations have different dtype and/or device than bidder. Converting to {},{}".format(
self._cached_observations.device, self._cached_observations.dtype)
)
if not new_value.equal(self._cached_observations):
self._cached_observations = new_value.to(self._cached_observations.device, self._cached_observations.dtype)
self._cached_observations_changed = True
@property
def cached_valuations(self):
return self._cached_valuations
@cached_valuations.setter
def cached_valuations(self, new_value: torch.Tensor):
"""When manually setting valuations, make sure that the _valuations_changed flag is set correctly."""
if new_value.shape != self._cached_valuations.shape:
warnings.warn("New valuations have different shape than specified in Bidder object!")
if (new_value.dtype, new_value.device) != (self._cached_valuations.dtype, self._cached_valuations.device):
warnings.warn(
"New valuations have different dtype and/or device than bidder. Converting to {},{}".format(
self._cached_valuations.device, self._cached_valuations.dtype)
)
if not new_value.equal(self._cached_valuations):
self._cached_valuations = new_value.to(self._cached_valuations.device, self._cached_valuations.dtype)
self._cached_valuations_changed = True
[docs] def get_utility(self, allocations, payments, valuations=None):
"""
For a batch of valuations, allocations, and payments of the bidder,
return their utility.
Can handle multiple batch dimensions, e.g. for allocations a shape of
( outer_batch_size, inner_batch_size, n_items). These batch dimensions are kept in returned
payoff.
"""
if valuations is None:
valuations = self._cached_valuations
welfare = self.get_welfare(allocations, valuations)
payoff = welfare - payments
if self.risk == 1.0:
return payoff
# payoff^alpha not well defined in negative domain for risk averse agents
# the following is a memory-saving implementation of
#return payoff.relu()**self.risk - (-payoff).relu()**self.risk
return payoff.relu().pow_(self.risk).sub_(payoff.neg_().relu_().pow_(self.risk))
[docs] def get_welfare(self, allocations, valuations=None):
"""For a batch of allocations return the player's welfare.
If valuations are not specified, welfare is calculated for
`self.valuations`.
Can handle multiple batch dimensions, e.g. for valuations a shape of
(..., batch_size, n_items). These batch dimensions are kept in returned
welfare.
"""
assert allocations.dim() >= 2 # [batch_sizes] x items
if valuations is None:
valuations = self._cached_valuations
item_dimension = valuations.dim() - 1
welfare = (valuations * allocations).sum(dim=item_dimension)
return welfare
[docs] def get_action(self, observations = None, deterministic: bool = False):
"""Calculate action from given observations, or retrieve from cache"""
if self._enable_action_caching and not self._cached_observations_changed and \
(observations is None or torch.equal(observations, self._cached_observations)):
return self._cached_actions
if observations is None:
assert self._enable_action_caching, \
"Action caching is disabled but no observation argument was provided to get_actions."
# No observations have been given, but _cached_observations_changed
# use cached observations but recompute actions
observations = self._cached_observations
#TODO: there was a reshaping here added by Nils (to self.batch_size, -1). This is problematic, should be done
# in strategy, not here. Strategy should always map complete obs to complete actions.
inputs = observations
# for cases when n_observations != input_length (e.g. Split-Award Auctions, combinatorial auctions with bid languages)
# TODO: generalize this, see #82. https://gitlab.lrz.de/heidekrueger/bnelearn/issues/82
if hasattr(self.strategy, 'input_length') and self.strategy.input_length != self.observation_size:
warnings.warn("Strategy expects shorter input_length than n_items. Truncating observations...")
dim = self.strategy.input_length
inputs = inputs[..., :dim]
actions = self.strategy.play(inputs, deterministic=deterministic)
if self._enable_action_caching:
self.cached_observations = observations
self._cached_actions = actions
# we have updated the cached actions, so we can disable the
# flag that they need to be recomputed.
self._cached_observations_changed = False
return actions
[docs]class ReverseBidder(Bidder):
"""Bidder that has reversed utility :math:`\cdot (-1)` as valuations correspond to
their costs and payments to what they get payed.
"""
def __init__(self, efficiency_parameter=None, **kwargs):
self.efficiency_parameter = efficiency_parameter
super().__init__(**kwargs)
[docs] def get_utility(self, allocations, payments, valuations = None):
"""For reverse bidders, returns are inverted.
"""
return - super().get_utility(allocations, payments, valuations)
[docs]class CombinatorialBidder(Bidder):
"""Bidder in combinatorial auctions.
Note: Currently only set up for full LLG setting.
"""
def __init__(self, **kwargs):
super().__init__(**kwargs)
if hasattr(self.strategy, 'input_length'): # `ClosureStrategy` doesn't have `input_length`
self.input_length = self.strategy.input_length
self.output_length = self.strategy.output_length
else:
self.input_length = self.valuation_size
self.output_length = self.bid_size
[docs] def get_welfare(self, allocations, valuations: torch.Tensor=None) -> torch.Tensor:
assert allocations.dim() >= 2 # *batch_sizes x items
if valuations is None:
valuations = self._cached_valuations
item_dimension = valuations.dim() - 1
valuations_extended = torch.zeros_like(allocations, dtype=torch.float)
if self.player_position == 2:
valuations_extended[..., [2]] = valuations
else:
valuations_extended[..., [self.player_position, 2]] = valuations
welfare = (valuations_extended * allocations).sum(dim=item_dimension)
return welfare
[docs]class Contestant(Bidder):
[docs] def get_utility(self, winning_probabilities, payments, valuations=None):
"""
For a batch of valuations, allocations, and payments of the contestant,
return their utility.
Can handle multiple batch dimensions, e.g. for allocations a shape of
( outer_batch_size, inner_batch_size, n_items). These batch dimensions are kept in returned
payoff.
"""
if valuations is None:
valuations = self._cached_valuations
welfare = self.get_welfare(payments, valuations)
try:
payoff = winning_probabilities - welfare.unsqueeze(-1)
except:
print(2)
return payoff.squeeze()
[docs] def get_welfare(self, payments, valuations=None):
"""For a batch of allocations return the player's welfare.
If valuations are not specified, welfare is calculated for
`self.valuations`.
Can handle multiple batch dimensions, e.g. for valuations a shape of
(..., batch_size, n_items). These batch dimensions are kept in returned
welfare.
"""
#assert payments.dim() >= 2 # [batch_sizes] x items
if valuations is None:
valuations = self._cached_valuations
item_dimension = valuations.dim() - 1
welfare = (valuations * payments.unsqueeze(-1)).sum(dim=item_dimension)
return welfare
[docs]class CrowdsourcingContestant(Bidder):
def __init__(self, strategy: Strategy,
player_position: int,
batch_size: int,
enable_action_caching: bool = False,
crowdsourcing_values: bool = True,
value_contest: bool = True):
super().__init__(strategy, player_position, batch_size, enable_action_caching=enable_action_caching)
self.crowdsourcing_values = crowdsourcing_values
self.num_classes = self.crowdsourcing_values.shape[0]
self.value_contest = value_contest
[docs] def get_utility(self, allocations, payments, ability=None):
"""
For a batch of valuations, allocations, and payments of the contestant,
return their utility.
Can handle multiple batch dimensions, e.g. for allocations a shape of
( outer_batch_size, inner_batch_size, n_items). These batch dimensions are kept in returned
payoff.
"""
if ability is None:
ability = self._cached_valuations
# retrieve valuations
## one hot encoding
allocations = torch.nn.functional.one_hot(allocations.long(), self.num_classes)
allocations = (allocations * self.crowdsourcing_values).sum(-1)
if self.value_contest:
allocations = allocations * ability
disutil = payments.unsqueeze(-1)
else:
# disutlity
disutil = ability * payments.unsqueeze(-1)
payoff = allocations - disutil
return payoff.squeeze(-1)