Source code for bnelearn.tests.test_matrix_FP


import math
import pytest

import torch

from bnelearn.bidder import MatrixGamePlayer
from bnelearn.environment import MatrixGameEnvironment
from bnelearn.mechanism import (BattleOfTheSexes, MatchingPennies,
                                PrisonersDilemma)
from bnelearn.strategy import (FictitiousPlayMixedStrategy,
                               FictitiousPlaySmoothStrategy,
                               FictitiousPlayStrategy, MatrixGameStrategy)

# Cuda
torch.cuda.is_available()
cuda = torch.cuda.is_available()
device = 'cuda' if cuda else 'cpu'

## Environment settings
# Dummies here
batch_size = 1
input_length = 1
# Params
epochs = 500

# Wrapper transforming a strategy to bidder, used by the optimizer
# this is a dummy, valuation doesn't matter
[docs]def strat_to_player(strategy, batch_size, player_position=None): return MatrixGamePlayer(strategy, batch_size = batch_size, player_position=player_position)
[docs]def init_setup(game, strategy, initial_beliefs): strats = [None] * game.n_players players = [None] * game.n_players # init strategies if strategy is FictitiousPlayStrategy or strategy is FictitiousPlaySmoothStrategy: for i in range(game.n_players): strats[i] = strategy(game = game, initial_beliefs = initial_beliefs) else: strat0 = strategy(game = game, initial_beliefs = initial_beliefs) for i in range(game.n_players): strats[i] = strat0 # init players for i in range(game.n_players): players[i] = strat_to_player(strats[i], batch_size = batch_size, player_position = i) # init environment env = MatrixGameEnvironment(game = game, agents = players, n_players = game.n_players, batch_size = batch_size, strategy_to_player_closure = strat_to_player) return strats, players, env
[docs]def train(epochs, players, strats, tau_update = 1, tau = 0.99, tau_minimum = 0.0001): for e in range(epochs): actions = [None] * len(players) for i,playr in enumerate(players): actions[i] = playr.get_action() # if e%(epochs/10) == 0: # print(actions) for _,strategy in enumerate(strats): strategy.update_observations(actions) strategy.update_beliefs() if ((isinstance(strategy, FictitiousPlaySmoothStrategy) or isinstance(strategy, FictitiousPlayMixedStrategy)) and e > 0 and e%tau_update == 0 and strategy.tau >= tau_minimum): strategy.update_tau(tau) return strats, players
############################################# Fictitious Play ################################################# #TODO: Do I even need env. anymore?
[docs]def test_FictitiousPlayStrategy_PD(): strats, players, env = init_setup(PrisonersDilemma(), FictitiousPlayStrategy, None) strats, players = train(epochs, players, strats) # Testing convergence for i,playr in enumerate(players): assert math.isclose(playr.get_action(),1, abs_tol=0.1)
[docs]def test_FictitiousPlayStrategy_MP(): strats, players, env = init_setup(MatchingPennies(), FictitiousPlayStrategy, None) strats, players = train(epochs, players, strats) # Testing convergence for i,strat in enumerate(strats): for s in strat.probs[i]: assert math.isclose(s,0.5, abs_tol = 0.1)
[docs]def test_FictitiousPlayStrategy_BoS(): # Init para for testing ''' initial_beliefs = [None] * 2 #initial_beliefs[0] = torch.tensor([[0.8308,0.5793],[0.4064,0.4113]], device=device) <- converges #initial_beliefs[1] = torch.tensor([[0.2753,0.4043],[0.1596,0.6916]], device=device) <- converges #initial_beliefs[0] = torch.tensor([[0.5892,0.4108],[0.4970,0.5030]], device=device) #<- converges #initial_beliefs[1] = torch.tensor([[0.4051,0.5949],[0.1875,0.8125]], device=device) #<- converges #initial_beliefs[0] = torch.tensor([[0.59,0.41],[0.49,0.51]], device=device) #<- converges #initial_beliefs[1] = torch.tensor([[0.41,0.59],[0.19,0.81]], device=device) #<- converges #initial_beliefs[0] = torch.tensor([[0.59,0.41],[0.49,0.51]], device=device) #<- converges #initial_beliefs[1] = torch.tensor([[0.59,0.41],[0.49,0.51]], device=device) #<- converges #initial_beliefs[0] = torch.tensor([[0.59,0.41],[0.41,0.59]], device=device) #<- converges #initial_beliefs[1] = torch.tensor([[0.59,0.41],[0.41,0.59]], device=device) #<- converges #initial_beliefs[0] = torch.tensor([[59.5,40.5],[40.5,59.5]], device=device) #<- converges #initial_beliefs[1] = torch.tensor([[59.5,40.5],[40.5,59.5]], device=device) #<- converges #initial_beliefs[0] = torch.tensor([[59,41],[49,51]], device=device) #<- doesn't converge #initial_beliefs[1] = torch.tensor([[41,59],[19,81]], device=device) #<- doens't converge #initial_beliefs[0] = torch.tensor([[0.6,0.4],[0.5,0.5]], device=device) #<- doesn't converge #initial_beliefs[1] = torch.tensor([[0.4,0.6],[0.1875,0.8125]], device=device) #<- doesn't converge #initial_beliefs[0] = torch.tensor([[0.6,0.4],[0.5,0.5]], device=device) #<- doesn't converge #initial_beliefs[1] = torch.tensor([[0.4,0.6],[0.2,0.8]], device=device) #<- doesn't converge #initial_beliefs = torch.tensor([[60,41],[41,60]], device=device) #<- doesn't converge #initial_beliefs[1] = torch.tensor([[60,41],[41,60]], device=device) #<- doesn't converge # -> It converges if the init is very close to MNE play for at least one player but not exactly! # -> My hypotheses: it has to be close to cycle. If it is exact, # it is indifferent and takes a random direction, diverging away. # -> $$ The question now is whether we should/have track historical actions with integer!? # -> No. In Fudenberg (1999) - Learning and Equilirbium, p. 389 # they init FP with (1,sqrt(2)), so obviously use float as well. ''' strats, players, env = init_setup(BattleOfTheSexes(), FictitiousPlayStrategy, None) strats, players = train(epochs, players, strats) # Testing convergence assert (math.isclose(players[0].get_action(), 0, abs_tol=0.1) or math.isclose(players[0].get_action(), 1, abs_tol=0.1)), \ "Player 0's action: {} is neither 0 nor 1".format(players[0].get_action())
###################################################################################################################### ############################################# Smooth Fictitious Play #################################################
[docs]def test_FictitiousPlaySmoothStrategy_PD(): strats, players, env = init_setup(PrisonersDilemma(), FictitiousPlaySmoothStrategy, None) strats, players = train(epochs, players, strats) # Testing convergence for i,playr in enumerate(players): assert math.isclose(playr.get_action(),1, abs_tol=0.1)
[docs]def test_FictitiousPlaySmoothStrategy_MP(): strats, players, env = init_setup(MatchingPennies(), FictitiousPlaySmoothStrategy, None) strats, players = train(epochs, players, strats) # Testing convergence for i,strat in enumerate(strats): for s in strat.probs[i]: assert math.isclose(s,0.5, abs_tol = 0.1)
[docs]def test_FictitiousPlaySmoothStrategy_BoS(): # Converge to PN strats, players, env = init_setup(BattleOfTheSexes(), FictitiousPlaySmoothStrategy, None) strats, players = train(epochs, players, strats) # Testing convergence assert math.isclose(players[0].get_action(),players[1].get_action(), abs_tol=0.1), \ "Player 0's action: {} is different than player 1's action: {}".format( players[0].get_action(), players[1].get_action()) assert (math.isclose(players[0].get_action(), 0, abs_tol=0.1) or math.isclose(players[0].get_action(), 1, abs_tol=0.1)), \ "Player 0's action: {} is neither 0 nor 1".format(players[0].get_action()) # Can't hold converge to MNE! # Params tau_update = 10 tau = 0.99 tau_minimum = 0.5 initial_beliefs = torch.tensor([[60,40],[40,60]], dtype=torch.float, device=device) strats, players, env = init_setup(BattleOfTheSexes(), FictitiousPlaySmoothStrategy, initial_beliefs) strats, players = train(5000, players, strats, tau_update = tau_update, tau = tau, tau_minimum = tau_minimum) # Testing convergence #TODO: fix this pytest.skip("something is wrong with this test -- it 'passes' when the difference is LARGE") assert abs(strats[0].probs[0][0] - 0.6) > 0.1, \ "Strategy 0's probs: {} is not more than 0.1 different than equilibrium 0.6".format(strats[0].probs[0]) assert abs(strats[1].probs[1][0] - 0.4) > 0.1, \ "Strategy 1's probs: {} is not more than 0.1 different than equilibrium 0.4".format(strats[1].probs[1])
##################################################################################################################### ############################################# Mixed Fictitious Play #################################################
[docs]def test_FictitiousPlayMixedStrategy_PD(): strats, players, env = init_setup(PrisonersDilemma(), FictitiousPlayMixedStrategy, None) strats, players = train(epochs, players, strats) # Testing convergence for i,playr in enumerate(players): assert (math.isclose(playr.get_action()[1],1, abs_tol=0.1))
[docs]def test_FictitiousPlayMixedStrategy_MP(): strats, players, env = init_setup(MatchingPennies(), FictitiousPlayMixedStrategy, None) strats, players = train(epochs, players, strats) # Testing convergence for i,strat in enumerate(strats): for s in strat.probs[i]: assert math.isclose(s,0.5, abs_tol = 0.1)
[docs]def test_FictitiousPlayMixedStrategy_BoS(): # Converge to PN strats, players, env = init_setup(BattleOfTheSexes(), FictitiousPlayMixedStrategy, None) strats, players = train(epochs, players, strats) # Testing convergence assert math.isclose(players[0].get_action()[0],players[1].get_action()[0], abs_tol=0.1), \ "Player 0's action: {} is different than player 1's action: {}".format( players[0].get_action()[0], players[1].get_action()[0]) assert math.isclose(players[0].get_action()[1],players[1].get_action()[1], abs_tol=0.1), \ "Player 0's action: {} is different than player 1's action: {}".format( players[0].get_action()[1], players[1].get_action()[1]) assert (math.isclose(players[0].get_action()[0], 0, abs_tol=0.1) or math.isclose(players[0].get_action()[0], 1, abs_tol=0.1)), \ "Player 0's action: {} is neither 0 nor 1".format(players[0].get_action()[0]) # Can hold converge to MNE! # Params tau_update = 10 tau = 0.99 tau_minimum = 0.5 initial_beliefs = torch.tensor([[60,40],[40,60]], dtype=torch.float, device=device) strats, players, env = init_setup(BattleOfTheSexes(), FictitiousPlayMixedStrategy, initial_beliefs) strats, players = train(5000, players, strats, tau_update = tau_update, tau = tau, tau_minimum = tau_minimum) # Testing convergence assert math.isclose(strats[0].probs[0][0],0.6, abs_tol=0.1), \ "Strategy 0's probs: {} is different than equilibrium 0.6".format(strats[0].probs[0]) assert math.isclose(strats[1].probs[1][0],0.4, abs_tol=0.1), \ "Strategy 1's probs: {} is different than equilibrium 0.4".format(strats[1].probs[1])
#####################################################################################################################