-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgames.py
274 lines (238 loc) · 8.81 KB
/
games.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
import numpy as np
# Game 1: A 3-action 2-player game with team rewards.
# This game has two PSNE using u1 and u2: [0, 0], [2, 2]
# Checked for correctness using Gambit.
monfg1 = [
np.array([[(4, 0), (3, 1), (2, 2)],
[(3, 1), (2, 2), (1, 3)],
[(2, 2), (1, 3), (0, 4)]]),
np.array([[(4, 0), (3, 1), (2, 2)],
[(3, 1), (2, 2), (1, 3)],
[(2, 2), (1, 3), (0, 4)]])
]
# Game 2: A 2-action 2-player game with team rewards.
# This game has two PSNE using u1 and u2: [0, 0], [1, 1]
# Checked for correctness using Gambit.
monfg2 = [
np.array([[(4, 0), (2, 2)],
[(2, 2), (0, 4)]]),
np.array([[(4, 0), (2, 2)],
[(2, 2), (0, 4)]])
]
# Game 3: A 2-action 2-player game with team rewards.
# This game has one PSNE using u1 and u2: [0, 0]
# Checked for correctness using Gambit.
monfg3 = [
np.array([[(4, 0), (3, 1)],
[(3, 1), (2, 2)]]),
np.array([[(4, 0), (3, 1)],
[(3, 1), (2, 2)]])
]
# Game 4: A 2-action 2-player game with team rewards.
# This game has two PSNE using u1 and u2: [0, 0], [1, 1]
# Checked for correctness using Gambit.
# This game shows cyclic behaviour under IBR with simultaneous updates but not with alternating updates.
# This game shows no cyclic behaviour with fictitious play.
monfg4 = [
np.array([[(4, 1), (1, 2)],
[(3, 1), (3, 2)]]),
np.array([[(4, 1), (1, 2)],
[(3, 1), (3, 2)]])
]
# Game 5: A 3-action 2-player game with team rewards.
# This game has three PSNE using u1 and u2: [0, 0], [1, 1], [2, 2]
# Checked for correctness using Gambit.
# This game shows cyclic behaviour under IBR with simultaneous updates but not with alternating updates.
monfg5 = [
np.array([[(4, 1), (1, 2), (2, 1)],
[(3, 1), (3, 2), (1, 2)],
[(1, 2), (2, 1), (1, 3)]]),
np.array([[(4, 1), (1, 2), (2, 1)],
[(3, 1), (3, 2), (1, 2)],
[(1, 2), (2, 1), (1, 3)]])
]
# Game 6: A 3-action 2-player game with individual rewards.
# This game has two PSNE using u1 and u2: [0, 0], [2, 2]
# Checked for correctness using Gambit.
monfg6 = [
np.array([[(4, 1), (1, 2), (2, 1)],
[(3, 1), (3, 2), (1, 2)],
[(1, 2), (2, 1), (1, 3)]]),
np.array([[(4, 0), (3, 1), (2, 2)],
[(3, 1), (2, 2), (1, 3)],
[(2, 2), (1, 3), (0, 4)]])
]
# Game 7: A 3-action 2-player game with individual rewards.
# This game has no PSNE using u1 and u2.
# Checked for correctness using Gambit.
monfg7 = [
np.array([[(2, 3), (3, 2), (1, 1)],
[(2, 5), (0, 2), (5, 2)],
[(1, 3), (4, 0), (1, 3)]]),
np.array([[(0, 3), (1, 2), (2, 1)],
[(2, 2), (3, 2), (1, 2)],
[(3, 1), (0, 3), (1, 0)]])
]
# Game 8: A 2-action 3-player game with individual rewards.
# This game has two PSNE using u1, u2 and u3: [0, 1, 1], [1, 0, 1]
# Checked for correctness by hand.
monfg8 = [
np.array([[[(1, 0), (2, 1)],
[(3, 0), (1, 2)]],
[[(0, 2), (2, 2)],
[(3, 1), (2, 0)]]]),
np.array([[[(2, 0), (0, 2)],
[(1, 1), (1, 2)]],
[[(0, 0), (1, 2)],
[(2, 1), (0, 0)]]]),
np.array([[[(1, 2), (2, 1)],
[(0, 1), (2, 2)]],
[[(1, 1), (0, 3)],
[(1, 1), (1, 2)]]])
]
# Game 9: A 3-player game where player 1 has 3 actions, player 2 has 2 and player 3 has 3, with individual rewards.
# This game has three PSNE using u1, u2 and u3: [0, 1, 1], [1, 0, 2], [1, 1, 0]
# Checked for correctness by hand.
monfg9 = [
np.array([[[(1, 0), (2, 1), (1, 2)],
[(3, 0), (1, 2), (2, 2)]],
[[(0, 2), (2, 2), (3, 0)],
[(3, 1), (2, 0), (0, 1)]],
[[(1, 1), (0, 0), (2, 1)],
[(1, 2), (2, 0), (3, 0)]]]),
np.array([[[(0, 2), (0, 1), (1, 1)],
[(1, 3), (2, 2), (2, 2)]],
[[(0, 2), (2, 0), (3, 0)],
[(3, 1), (1, 0), (2, 1)]],
[[(2, 2), (2, 1), (2, 0)],
[(0, 1), (1, 3), (1, 1)]]]),
np.array([[[(1, 3), (1, 1), (2, 2)],
[(2, 1), (2, 3), (2, 0)]],
[[(0, 2), (1, 1), (3, 1)],
[(3, 1), (2, 1), (2, 1)]],
[[(0, 1), (1, 0), (0, 0)],
[(1, 1), (2, 1), (1, 1)]]])
]
monfg10 = [
np.array([[(2, 2), (0, 0)],
[(2, 2), (1, 1)]]),
np.array([[(2, 2), (2, 2)],
[(2, 2), (2, 2)]])
]
def u1(vector):
"""
This function calculates the utility for agent 1.
:param vector: The reward vector.
:return: The utility for agent 1.
"""
utility = vector[0] ** 2 + vector[1] ** 2
return utility
def u2(vector):
"""
This function calculates the utility for an agent.
:param vector: The reward vector.
:return: The utility for an agent.
"""
utility = vector[0] ** 2 + vector[0] * vector[1] + vector[1] ** 2
return utility
def u3(vector):
"""
This function calculates the utility for an agent.
:param vector: The reward vector.
:return: The utility for an agent.
"""
utility = vector[0] ** 2 + vector[1]
return utility
def u4(vector):
"""
This function calculates the utility for an agent.
:param vector: The reward vector.
:return: The utility for an agent.
"""
utility = vector[0] + vector[1] ** 2
return utility
def get_monfg(game):
"""
This function will provide the correct multi-objective normal-form game based on the game that is requested.
:param game: The current game.
:return: A list of payoff matrices.
"""
if game == 'game1':
monfg = monfg1
elif game == 'game2':
monfg = monfg2
elif game == 'game3':
monfg = monfg3
elif game == 'game4':
monfg = monfg4
elif game == 'game5':
monfg = monfg5
elif game == 'game6':
monfg = monfg6
elif game == 'game7':
monfg = monfg7
elif game == 'game8':
monfg = monfg8
elif game == 'game9':
monfg = monfg9
elif game == 'game10':
monfg = monfg10
else:
raise Exception("The provided game does not exist.")
return monfg
def get_u(u_str):
"""
This function gets the correct utility function from its name.
:param u_str: The requested utility function.
:return: A utility function.
"""
if u_str == 'u1':
u = u1
elif u_str == 'u2':
u = u2
elif u_str == 'u3':
u = u3
elif u_str == 'u4':
u = u4
else:
raise Exception("The provided utility function does not exist.")
return u
def generate_random_monfg(player_actions=(2, 2), num_objectives=2, reward_min_bound=0, reward_max_bound=5):
"""
This function will generate a random MONFG for testing purposes.
:param player_actions: A tuple of actions indexed by player.
:param num_objectives: The number of objectives in the game.
:param reward_min_bound: The minimum reward on an objective.
:param reward_max_bound: The maximum reward on an objective.
:return: A list of payoff matrices representing the MONFG.
"""
payoffs = []
payoffs_shape = player_actions + tuple([num_objectives]) # Define the shape of the payoff matrices.
for _ in range(len(player_actions)):
payoff_matrix = np.random.randint(low=reward_min_bound, high=reward_max_bound, size=payoffs_shape)
payoffs.append(payoff_matrix)
return payoffs
def generate_identity_game(player_actions=(2, 2)):
"""
This function generates an identity game.
:param player_actions: A tuple with at each index the number of actions for that player.
:return: A list of payoff matrices.
"""
payoffs = []
joint_strat_length = np.sum(player_actions) # Description length of a joint strategy.
num_joint_strat = np.prod(player_actions) # Number of joint strategies.
payoffs_shape = player_actions + tuple([joint_strat_length]) # Shape of the payoff matrices.
payoff_matrix = np.zeros(payoffs_shape) # Make the same payoff matrix for every player.
for flat_joint_strat in range(num_joint_strat): # Loop over joint strategies.
joint_strat = np.unravel_index(flat_joint_strat, player_actions) # Get the coordinates.
identity_vec = [] # Initialise the identity payoff. One hot encode joint strategies in this variable.
for player, action in enumerate(joint_strat): # One hot encode each player's strategy.
strat_vec = np.zeros(player_actions[player])
strat_vec[action] = 1
identity_vec.extend(list(strat_vec))
payoff_matrix[joint_strat] = np.array(identity_vec)
payoffs.append(payoff_matrix)
for _ in range(len(player_actions)-1): # We already have the first payoff matrix, so copy the rest now.
payoff_copy = np.copy(payoff_matrix)
payoffs.append(payoff_copy)
return payoffs