import numpy as np
import math
import matplotlib.pyplot as plt
from time import time
import scipy.stats as stats
from scipy.stats import beta
from functools import reduce
from operator import matmul
Utility functions (please skip to the next section)
def softmax(r):
# Define a softmax function.
= np.exp(r - np.max(r))
exp_r return exp_r / np.sum(exp_r)
def gradient_ascent_dynamics(G, n, eta=1.0, max_iter=50_000, T_max=10):
# The following function implements a (softmax) gradient ascent algorithm for a 3-player game with n actions.
# It takes the number of actions n, a correlation parameter lam, a learning rate eta, a max iteration count,
# and a tolerance for convergence as inputs. It returns the average payoff, the number of iterations,
# and the final payoff for player 0.
= time()
t_0 # Initialise each player's "logit" vector (which will be mapped to a mixed strategy via softmax).
= np.zeros(n)
r0 = np.zeros(n)
r1 = np.zeros(n)
r2
= 0.0
total_payoff
for it in range(max_iter):
# Convert logits to mixed strategies.
= softmax(r0)
p0 = softmax(r1)
p1 = softmax(r2)
p2
# Compute the expected payoff for each pure action for each player.
# For player 0: Q0[i] = sum_{j,k} p1[j] p2[k] G[0][i, j, k]
= np.array([np.dot(p1, np.dot(G[0][i, :n, :n], p2)) for i in range(n)])
Q0 # For player 1: Q1[i] = sum_{j,k} p0[j] p2[k] G[1][j, i, k]
= np.array([np.dot(p0, np.dot(G[1][:n, i, :n], p2)) for i in range(n)])
Q1 # For player 2: Q2[i] = sum_{j,k} p0[j] p1[k] G[2][j, k, i]
= np.array([np.dot(p0, np.dot(G[2][:n, :n, i], p1)) for i in range(n)])
Q2
# Compute the overall expected payoff for each player.
= np.dot(p0, Q0)
f0 = np.dot(p1, Q1)
f1 = np.dot(p2, Q2)
f2
# Accumulate payoff for averaging.
+= f0
total_payoff
# Every 100 iterations, check if the current strategy profile is an epsilon-NE.
if it % 100 == 0:
= np.max(Q0)
best0 = np.max(Q1)
best1 = np.max(Q2)
best2 if ((f0 >= 0.99 * best0) and (f1 >= 0.99 * best1) and (f2 >= 0.99 * best2)) or (time()-t_0 > T_max):
break
= Q0 - f0
grad_r0 = Q1 - f1
grad_r1 = Q2 - f2
grad_r2
# Update the logits using a simple gradient ascent step.
+= eta * grad_r0
r0 += eta * grad_r1
r1 += eta * grad_r2
r2
= total_payoff / it
avg_payoff return it, avg_payoff, f0
# Hyperparameters for gradient ascent. These numbers have been selected after some trial and error, and provided the best results for this algorithm.
= 1.0
step_size = 50_000 max_iterations
These are utility function only used to plot
def clopper_pearson_interval(k, n, alpha=0.005):
= beta.ppf(alpha / 2, k, n - k + 1) if k > 0 else 0.0
lower = beta.ppf(1 - alpha / 2, k + 1, n - k) if k < n else 1.0
upper return lower, upper
def plot_single_algorithm(alg_name="SBRD", varName="time", ylabel=None, axis=0, mean_name=None, variance=True, only_if_conv=False, log=False, n_cols=1, number_of_plots=-1, binomial=False, isPositive=False, width=6, legend_loc = "upper left", title=None):
"""
Plot runtimes of a single algorithm (PGD or SBRD) in subplots, comparing across one parameter.
Parameters:
- alg_name: 'PGD' or 'SBRD'
- varName: variable name suffix for accessing data (e.g., "Seconds" accesses PGD_Seconds or SBRD_Seconds)
- axis: 0 or 1
- 0: compare across N for each Lambda (row-wise)
- 1: compare across Lambda for each N (column-wise)
- variance: whether to include standard deviation shading
- only_if_conv: whether to only include data when convergence was achieved (using *_isNash)
- log: whether to plot on a log scale
- n_cols: number of columns of subplots to create
- number_of_plots: number of subplots to show (-1 for all)
- lambda_vals: optional list/array of Lambda values
- A_vals: optional list/array of N values
- binomial: whether the data is binomial (0-1 values)
"""
if ylabel == None:
= varName
ylabel
# If the function input "only_if_conv" is True, only include the cases in which the samples for which algorithm converges
= globals()[f"{alg_name}_{varName}"]
data if only_if_conv:
= globals()[f"{alg_name}_isNash"]
isNash = np.where(isNash, data, np.nan)
masked_data else:
= data
masked_data
"""
Creates the matrix of means and the matrices needed for the Confidence Intervals.
If the data is Binary, the CI is done using the Clopper Pearson interval. If the data is real, we use the Standard Error
"""
= np.nanmean(masked_data, axis=-1)
mean_data if variance:
= np.sum(~np.isnan(masked_data), axis=-1)
n_data if binomial:
= np.nansum(masked_data, axis=-1)
sum_data = np.zeros_like(mean_data)
lower_bounds = np.ones_like(mean_data)
upper_bounds for i in range(mean_data.shape[0]):
for j in range(mean_data.shape[1]):
if n_data[i, j] > 0:
= clopper_pearson_interval(int(sum_data[i, j]), int(n_data[i, j]))
lower, upper = lower
lower_bounds[i, j] = upper
upper_bounds[i, j] else:
= np.sqrt(np.divide(np.nanvar(masked_data, axis=-1), n_data))
SE_data
= mean_data.shape
L, N
"""
Depending on wether we want the plots to be done along the 0 or 1 axis, we get different "get_mean", "get_lower" and "get_upper"
functions, that take as input i and return the right vector (respectively the mean, lower and upper CI points) with that index.
"""
if axis == 0:
= L
num_plots = A_vals
x_vals = lambda_vals
y_vals = lambda i: mean_data[i, :]
get_mean if variance:
if binomial:
= lambda i: lower_bounds[i, :]
get_lower = lambda i: upper_bounds[i, :]
get_upper else:
= lambda i: SE_data[i, :]
get_var = "Correlation λ"
label = "Nr. actions"
xlabel elif axis == 1:
= N
num_plots = lambda_vals
x_vals = A_vals
y_vals = lambda i: mean_data[:, i]
get_mean if variance:
if binomial:
= lambda i: lower_bounds[:, i]
get_lower = lambda i: upper_bounds[:, i]
get_upper else:
= lambda i: SE_data[:, i]
get_var = "Nr. actions"
label = "Correlation λ"
xlabel else:
raise ValueError("Axis must be 0 (over Lambda) or 1 (over N)")
"""
We now create the structure for the plots
"""
if number_of_plots == -1 or number_of_plots >= num_plots:
= list(range(num_plots))
indices else:
= np.linspace(0, num_plots - 1, number_of_plots, dtype=int)
indices
= math.ceil(len(indices) / n_cols)
n_rows = plt.subplots(n_rows, n_cols, figsize=(width * n_cols, 3 * n_rows), sharex=True)
fig, axes = np.array(axes).flatten()
axes
= (0.1, 0.5, 0.8) if alg_name == "PGD" else (0.9, 0.4, 0.2)
color
"""
We finally plot the various graphs
"""
for i, idx in enumerate(indices):
= axes[i]
ax
ax.set_xlabel(xlabel)if log:
'log')
ax.set_yscale(= get_mean(idx)
ydata = np.nanmin(ydata)
ymin = np.nanmax(ydata)
ymax
# Round to nearest powers of 10
= 10 ** np.floor(np.log10(ymin))
lower = 10 ** np.ceil(np.log10(ymax))
upper
ax.set_ylim(lower, upper)=color, label=mean_name if mean_name!= None else varName)
ax.plot(x_vals, get_mean(idx), colorif variance:
if binomial:
=color, alpha=0.2, label='99.5% CI')
ax.fill_between(x_vals, get_lower(idx), get_upper(idx), colorelse:
if isPositive:
= np.clip(get_mean(idx) - 2*get_var(idx), 0, None)
lower = np.clip(get_mean(idx) + 2*get_var(idx), 0, None)
upper =color, alpha=0.2, label='±2 SE')
ax.fill_between(x_vals, lower, upper, color
ax.set_ylabel(ylabel)if title == None:
f"{label} = {round(y_vals[idx], 2)}, samples = {samples}")
ax.set_title(else:
ax.set_title(title)=legend_loc)
ax.legend(locTrue)
ax.grid(
for j in range(len(indices), len(axes)):
fig.delaxes(axes[j])
=[0, 0, 1, 0.96])
plt.tight_layout(rectf"{alg_name}_{varName}_samples={samples} axis={axis}.pdf")
plt.savefig(
plt.show()
def compare_algorithms(PGD_varName="time", SBRD_varName="time", yLabel=None, axis=0, mean_1_name=None, mean_2_name=None, title=None, variance=True, only_if_conv=False, log=False, n_cols=1, number_of_plots=-1, binomial=False, width = 6, isPositive=False, legend_loc="upper left"):
"""
Plot runtimes of PGD and SBRD algorithms in subplots, comparing them across one parameter.
Parameters:
- PGD_varName, SBRD_varName: variable name suffix for accessing data (e.g., "time" accesses PGD_time and SBRD_time)
- axis: 0 or 1
- 0: compare across N for each Lambda (row-wise)
- 1: compare across Lambda for each N (column-wise)
- variance: whether to include standard deviation shading
- only_if_conv: whether to only include data when convergence was achieved (using PGD_isNash, SBRD_isNash)
- log: whether to plot on a log scale
- n_cols: number of columns of subplots to create
- lambda_vals: global list/array of Lambda values
- A_vals: global list/array of N values
- number_of_plots: number of subplots to display (equally spaced). If -1, show all.
- binomial: whether the data is binomial (0-1)
- isPositive: whether to clip the lower bound at 0
"""
if yLabel == None:
= PGD_varName
yLabel
def process_data(alg_name, varName):
= globals()[f"{alg_name}_{varName}"]
data if only_if_conv:
= globals()[f"{alg_name}_S7_isNash"]
isNash = np.where(isNash, data, np.nan)
data = np.nanmean(data, axis=-1)
mean_data = np.sum(~np.isnan(data), axis=-1)
n_data
if not variance:
return mean_data, None, None
if binomial:
= np.nansum(data, axis=-1)
sum_data = np.zeros_like(mean_data)
lower_bounds = np.ones_like(mean_data)
upper_bounds for i in range(mean_data.shape[0]):
for j in range(mean_data.shape[1]):
if n_data[i, j] > 0:
= clopper_pearson_interval(int(sum_data[i, j]), int(n_data[i, j]))
lower, upper = lower
lower_bounds[i, j] = upper
upper_bounds[i, j] return mean_data, lower_bounds, upper_bounds
else:
= np.sqrt(np.nanvar(data, axis=-1) / n_data)
SE = mean_data - 2 * SE
lower_bounds = mean_data + 2 * SE
upper_bounds return mean_data, lower_bounds, upper_bounds
= process_data("PGD", PGD_varName)
PGD_mean, PGD_lower, PGD_upper = process_data("SBRD", SBRD_varName)
SBRD_mean, SBRD_lower, SBRD_upper
= PGD_mean.shape
L, N if axis == 0:
= L
num_plots = A_vals
x_vals = lambda_vals
y_vals = lambda data, i: data[i, :]
get_slice = "Correlation λ"
label = "Nr. actions"
xlabel elif axis == 1:
= N
num_plots = lambda_vals
x_vals = A_vals
y_vals = lambda data, i: data[:, i]
get_slice = "Nr. actions"
label = "Correlation λ"
xlabel else:
raise ValueError("Axis must be 0 (over Lambda) or 1 (over N)")
= range(num_plots) if number_of_plots == -1 or number_of_plots >= num_plots else np.linspace(0, num_plots - 1, number_of_plots, dtype=int)
indices = math.ceil(len(indices) / n_cols)
n_rows = plt.subplots(n_rows, n_cols, figsize=(width * n_cols, 3 * n_rows), sharex=True)
fig, axes = np.array(axes).flatten()
axes
= {"PGD": (0.1, 0.5, 0.8), "SBRD": (0.9, 0.4, 0.2)}
colors
for i, idx in enumerate(indices):
= axes[i]
ax
ax.set_xlabel(xlabel)if log:
'log')
ax.set_yscale(
for name, mean, lower, upper, color in [
"PGD", PGD_mean, PGD_lower, PGD_upper, colors["PGD"]),
("SBRD", SBRD_mean, SBRD_lower, SBRD_upper, colors["SBRD"])
(
]:= get_slice(mean, idx)
y if variance:
if binomial:
if name == "PGD":
= mean_1_name
curr_label if name == "SBRD":
= mean_2_name
curr_label else:
if name == "PGD":
= mean_1_name
curr_label if name == "SBRD":
= mean_2_name
curr_label =curr_label, color=color)
ax.plot(x_vals, y, labelif variance and lower is not None and upper is not None:
= get_slice(lower, idx)
y_lower = get_slice(upper, idx)
y_upper if isPositive:
= np.clip(y_lower, 0, None)
y_lower = np.clip(y_upper, 0, None)
y_upper #label_ci = "99.5% CI" if binomial else "±2SE"
=color, alpha=0.2)#, label=f"{label_ci}")
ax.fill_between(x_vals, y_lower, y_upper, color
ax.set_ylabel(yLabel)if title == None:
f"{label} = {round(y_vals[idx], 2)}, samples = {samples}")
ax.set_title(else:
ax.set_title(title)= legend_loc)
ax.legend(locTrue)
ax.grid(
for j in range(len(indices), len(axes)):
fig.delaxes(axes[j])
=[0, 0, 1, 0.96])
plt.tight_layout(rectf"compare_{PGD_varName}_axis={axis}.pdf")
plt.savefig( plt.show()
Dynamics definitions
def generate_reward_matrix(P, A, lam):
"""
Generate the reward matrix for an P-player A-actions game
Parameters:
- N (int): Number of actions per player.
- lam (float): Correlation between players' payoffs.
Returns:
- np.ndarray: Reward tensor G of shape (3, N, N, N). Each entry G[i, a, b, c]
is the payoff for player i when players 1, 2, 3 play actions a, b, c respectively.
"""
# Correlation matrix across players
= np.diag(np.ones(P))
diagonal = np.ones((P, P))
ones_matrix = (ones_matrix-np.diag(np.diag(ones_matrix)))*lam+diagonal
corr_matrix
# Cholesky decomposition for generating correlated normal variables
= np.linalg.cholesky(corr_matrix)
L
# Generate correlated payoffs for all joint action profiles
= np.random.randn(P, A**P)
raw_samples = L @ raw_samples
correlated
= tuple(P if i==0 else A for i in range(P+1))
shape
# Reshape to (3, N, N, N): one payoff matrix per player
return correlated.reshape(shape)
def best_response_dynamics(G, A=None):
"""
Simulate Best Response Dynamics (SBRD) in a 3-player game.
Parameters:
- G (np.ndarray): Reward tensor of shape (3, N, N, N). G[i, a, b, c] gives the payoff
to player i when players 0, 1, 2 choose actions a, b, c respectively.
Returns:
- cycle_length (int): Length of the cycle detected by SBRD.
- total_steps (int): Total number of steps before entering the cycle.
- final_payoff (float): Average payoff for player 0 over the detected cycle.
"""
= G.shape[0]
P if A == None:
= G.shape[1]
A
= tuple(0 for _ in range(P)) # Initial strategy profile
x = 0 # Step counter
index = dict() # Maps strategy profiles to step indices
history
while True:
+= 1
index
# Best responses given others' actions
= tuple( np.argmax( G[ tuple(i if j == 0 else x[j-1] if j-1 != i else slice(0, A) for j in range(P+1)) ] ) for i in range(P))
x
if x in history:
= index - history[x]
cycle_length
if cycle_length == 1:
# Nash Equilibria
return cycle_length, index, G[tuple(0 if i == 0 else x[i-1] for i in range(P+1))]
# General cycle detected: compute average payoff for player 0
= [x]
cycle = G[tuple(0 if i == 0 else x[i-1] for i in range(P+1))]
avg
for _ in range(cycle_length - 1):
= tuple( np.argmax( G[ tuple(i if j == 0 else x[j-1] if j-1 != i else slice(0, A) for j in range(P+1)) ] ) for i in range(P))
x
cycle.append(x)+= G[tuple(0 if i == 0 else x[i-1] for i in range(P+1))]
avg
return cycle_length, index, avg / cycle_length
# Log the current profile and continue
= index history[x]
def softmax(r):
# Define a softmax function.
= np.exp(r - np.max(r))
exp_r return exp_r / np.sum(exp_r)
def new_SPGD(G, eta=1.0, max_iter=50_000, T_max=10):
# The following function implements a (softmax) gradient ascent algorithm for a 3-player game with n actions.
# It takes the number of actions n, a correlation parameter lam, a learning rate eta, a max iteration count,
# and a tolerance for convergence as inputs. It returns the average payoff, the number of iterations,
# and the final payoff for player 0.
= time()
t_0 # Initialise each player's "logit" vector (which will be mapped to a mixed strategy via softmax).
= G.shape[0:2]
P, A
= tuple(np.zeros(A) for _ in range(P))
R
= 0.0
total_payoff =A
n=[3 for i in range(P)]
pfor it in range(max_iter):
# Convert logits to mixed strategies.
= tuple(softmax(R[i]) for i in range(P))
Softmax
# Compute the expected payoff for each pure action for each player.
# For player 0: Q0[i] = sum_{j,k} p1[j] p2[k] G[0][i, j, k]
= tuple( # Q[0], …, Q[P-1]
Q # each is an (A, …, A) tensor
np.array([ =p) # fix the i-th action of player p
np.take(G[p], i, axis@ reduce(matmul, # multiply all the other Softmax mats
for j in range(P) if j != p))
(Softmax[j] for i in range(A)])
for p in range(P))
# For player 1: Q1[i] = sum_{j,k} p0[j] p2[k] G[1][j, i, k]
# Compute the overall expected payoff for each player.
= tuple(np.dot(Softmax[i], Q[i]) for i in range(P))
F
# Accumulate payoff for averaging.
+= F[0]
total_payoff
# Every 100 iterations, check if the current strategy profile is an epsilon-NE.
if it % 100 == 0:
= tuple(np.max(Q[i]) for i in range(P))
best if (min(best)>0.99) or (time()-t_0 > T_max):
break
= tuple(Q[i]-F[i] for i in range(P))
grad
# Update the logits using a simple gradient ascent step.
= tuple(R[i]+eta*grad[i] for i in range(P))
R
= total_payoff / it
avg_payoff return it, avg_payoff, F[0]
Main Results
Section 1. Finding: 2-player random potential games converge fast to a two-cycle
# Number of simulations per (λ, N) setting
= 10_000
samples = [0.09999999999*0.5*i for i in range(21)]
lambda_vals = [50]
A_vals
= np.full((len(lambda_vals), 1, samples), False, dtype=bool)
SBRD_S1_twoCycles = np.zeros((len(lambda_vals), len(A_vals), samples)) SBRD_S1_nrSteps
2025) #We set the random seed for reproducibility
np.random.seed(= time() # Start timer for entire experiment
T_0 = max(A_vals)
max_A for count_lam, lam in enumerate(lambda_vals): # Loop over each lambda value
for i in range(samples): # Repeat experiment for statistical robustness
= generate_reward_matrix(2, max_A, lam) # Generate a payoff matrix with max_n actions and correlation lambda
G
for count_A, A in enumerate(A_vals): # Now restrict to smaller N if needed
# Run Best Response Dynamics and time it
= best_response_dynamics(G, A)
SBRD_len, SBRD_it, SBRD_val_curr = (SBRD_len == 2)
SBRD_S1_twoCycles[count_lam, count_A, i] = SBRD_it
SBRD_S1_nrSteps[count_lam, count_A, i]
# Final time report
= time() - T_0
Total_time print(Total_time)
22.372665882110596
="SBRD", varName="S1_twoCycles", title="", mean_name="A", ylabel="Prob. conver. to two-cycle", axis=1, binomial=True, legend_loc="upper left") plot_single_algorithm(alg_name
="SBRD", varName="S1_nrSteps", mean_name="nrSteps", title="", ylabel="Steps", axis=1, isPositive=True, log=False, legend_loc="upper right") plot_single_algorithm(alg_name
print(round(lambda_vals[int(np.argmax(np.mean(SBRD_S1_twoCycles, axis=-1)>0.5))],2))
print(round(lambda_vals[int(np.argmax(np.mean(SBRD_S1_twoCycles, axis=-1)>0.9))],2))
0.55
0.9
Section 2. Finding: 3-player random potential games converge fast to a NE
# Number of simulations per (λ, N) setting
= 10_000
samples = [0.09999999999*0.5*i for i in range(21)]
lambda_vals = [50]
A_vals
= np.full((len(lambda_vals), 1, samples), False, dtype=bool)
SBRD_S2_isNash = np.zeros((len(lambda_vals), len(A_vals), samples)) SBRD_S2_nrSteps
2025) #We set the random seed for reproducibility
np.random.seed(= time() # Start timer for entire experiment
T_0 = max(A_vals)
max_A for count_lam, lam in enumerate(lambda_vals): # Loop over each lambda value
for i in range(samples): # Repeat experiment for statistical robustness
= generate_reward_matrix(3, max_A, lam) # Generate a payoff matrix with max_n actions and correlation lambda
G
for count_A, A in enumerate(A_vals): # Now restrict to smaller N if needed
# Run Best Response Dynamics and time it
= best_response_dynamics(G, A)
SBRD_len, SBRD_it, SBRD_val_curr = (SBRD_len == 1)
SBRD_S2_isNash[count_lam, count_A, i] = SBRD_it
SBRD_S2_nrSteps[count_lam, count_A, i]
# Final time report
= time() - T_0
Total_time print(Total_time)
1235.6397669315338
="SBRD", varName="S2_isNash", title="", mean_name="isNash", ylabel="Prob. conver. to NE", axis=1, binomial=True, legend_loc="upper left") plot_single_algorithm(alg_name
="SBRD", varName="S2_nrSteps", mean_name="nrSteps", title="", ylabel="Steps", axis=1, isPositive=True, log=False, legend_loc="upper right") plot_single_algorithm(alg_name
Section 3. Finding: comparison of SBRD and SPGD in 3-player random potential games
# Experimental setup
= 1000
samples = [0.85+0.09999999999*0.25*i for i in range(7)] # List of lambda values to test
lambda_vals = [50]
A_vals
# Outputs to Track
## Policy Gradient Dynamics
= np.full((len(lambda_vals), len(A_vals), samples), False, dtype=bool) # {0,1} veridicity of Nash in Policy Gradient Dynamics
PGD_S3_isNash = np.zeros((len(lambda_vals), len(A_vals), samples)) # Number of iterations in Policy Gradient Dynamics
PGD_S3_numIte = np.zeros((len(lambda_vals), len(A_vals), samples)) # Final payoff for player 0 in Policy Gradient Dynamics
PGD_S3_valFin = np.zeros((len(lambda_vals), len(A_vals), samples)) # Average payoff for player 0 in Policy Gradient Dynamics
PGD_S3_valAvg = np.zeros((len(lambda_vals), len(A_vals), samples)) # Runtime in Policy Gradient Dynamics
PGD_S3_time
## Best Response Dynamics
= np.full((len(lambda_vals), len(A_vals), samples), False, dtype=bool) # {0,1} veridicity of Nash in Best Response Dynamics
SBRD_S3_isNash = np.zeros((len(lambda_vals), len(A_vals), samples)) # Number of iterations in Best Response Dynamics
SBRD_S3_numIte = np.zeros((len(lambda_vals), len(A_vals), samples)) # Final payoff for player 0 in Best Response Dynamics
SBRD_S3_val = np.zeros((len(lambda_vals), len(A_vals), samples)) # Runtime in Best Response Dynamics SBRD_S3_time
= A_vals[-1] # Maximum number of actions (fixed at 50)
max_A 2025) #We set the random seed for reproducibility
np.random.seed(
= time() # Start the global timer
T_0
for count_lam, lam in enumerate(lambda_vals): # Loop over correlation values
for i in range(samples): # Loop over sample repetitions
# Generate a shared reward matrix for both algorithms
= generate_reward_matrix(3, max_A, lam)
G
for count_A, A in enumerate(A_vals): # Loop over population sizes (fixed here)
= time() # Start timer for PGD
t_0 = gradient_ascent_dynamics(G, A)
PGD_it, PGD_avg_val, PGD_val = time() - t_0 # Elapsed time
t
= (PGD_it < 50_000 - 1) # Converged?
PGD_S3_isNash[count_lam, count_A, i] = PGD_it
PGD_S3_numIte[count_lam, count_A, i] = PGD_val
PGD_S3_valFin[count_lam, count_A, i] = PGD_avg_val
PGD_S3_valAvg[count_lam, count_A, i] = t
PGD_S3_time[count_lam, count_A, i]
# === Best Response Dynamics (SBRD) ===
= time() # Start timer for SBRD
t_0 = best_response_dynamics(G, A)
SBRD_len, SBRD_it, SBRD_val_curr = time() - t_0 # Elapsed time
t
# Store SBRD results
= (SBRD_len == 1) # Converged to pure NE?
SBRD_S3_isNash[count_lam, count_A, i] = SBRD_it
SBRD_S3_numIte[count_lam, count_A, i] = SBRD_val_curr
SBRD_S3_val[count_lam, count_A, i] = t
SBRD_S3_time[count_lam, count_A, i]
# === Time Summary ===
= time() - T_0
Tot_time = np.sum(SBRD_S3_time)
Tot_time_SBRD = np.sum(PGD_S3_time)
Tot_time_PGD = Tot_time - Tot_time_SBRD - Tot_time_PGD
Tot_time_gener
# Print runtime breakdown
print(f"Total time to run: {round(Tot_time, 1)} s")
print(f"Percentage of the time spent on random generation of matrix: {round((Tot_time_gener / Tot_time) * 100, 1)}%")
print(f"Percentage of the time spent on SBRD algorithm: {round((Tot_time_SBRD / Tot_time) * 100, 1)}%")
print(f"Percentage of the time spent on PGD algorithm: {round((Tot_time_PGD / Tot_time) * 100, 1)}%")
Total time to run: 4802.2 s
Percentage of the time spent on random generation of matrix: 0.7%
Percentage of the time spent on SBRD algorithm: 0.1%
Percentage of the time spent on PGD algorithm: 99.2%
="S3_time", SBRD_varName="S3_time", title="", mean_1_name="SPGD±2SE", mean_2_name="SBRD±2SE", yLabel="Seconds", axis=1, variance=True, isPositive=True, log=True, legend_loc="upper right") compare_algorithms(PGD_varName
/var/folders/v1/4466w3pn1216dm8152kjp_3r0000gn/T/ipykernel_18423/3723386220.py:260: UserWarning: Data has no positive values, and therefore cannot be log-scaled.
ax.fill_between(x_vals, y_lower, y_upper, color=color, alpha=0.2)#, label=f"{label_ci}")
/var/folders/v1/4466w3pn1216dm8152kjp_3r0000gn/T/ipykernel_18423/3723386220.py:273: UserWarning: Data has no positive values, and therefore cannot be log-scaled.
plt.tight_layout(rect=[0, 0, 1, 0.96])
="S3_valFin", SBRD_varName="S3_val", title="", mean_1_name="SPGD±2SE", mean_2_name="SBRD±2SE", yLabel="Value", axis=1, variance=True, isPositive=True, only_if_conv=True, legend_loc="upper left") compare_algorithms(PGD_varName
="S3_numIte", SBRD_varName="S3_numIte", title="", mean_1_name="SPGD±2SE", mean_2_name="SBRD±2SE", yLabel="SSteps", axis=1, variance=True, isPositive=True, log=True, legend_loc="upper right") compare_algorithms(PGD_varName
="S3_valAvg", SBRD_varName="S3_val", title="", mean_1_name="SPGD±2SE", mean_2_name="SBRD±2SE", yLabel="Value", axis=1, variance=True, isPositive=True, legend_loc="upper right") compare_algorithms(PGD_varName
Appendix
Section A. Results of Section 1. also hold for more actions
# Number of simulations per (λ, N) setting
= 1_000
samples = [0.09999999999*0.5*i for i in range(21)]
lambda_vals = [500]
A_vals
= np.full((len(lambda_vals), 1, samples), False, dtype=bool)
SBRD_S4_twoCycles = np.zeros((len(lambda_vals), len(A_vals), samples)) SBRD_S4_nrSteps
2025) #We set the random seed for reproducibility
np.random.seed(= time() # Start timer for entire experiment
T_0 = max(A_vals)
max_A for count_lam, lam in enumerate(lambda_vals): # Loop over each lambda value
for i in range(samples): # Repeat experiment for statistical robustness
= generate_reward_matrix(2, max_A, lam) # Generate a payoff matrix with max_n actions and correlation lambda
G
for count_A, A in enumerate(A_vals): # Now restrict to smaller N if needed
# Run Best Response Dynamics and time it
= best_response_dynamics(G, A)
SBRD_len, SBRD_it, SBRD_val_curr = (SBRD_len == 2)
SBRD_S4_twoCycles[count_lam, count_A, i] = SBRD_it
SBRD_S4_nrSteps[count_lam, count_A, i]
# Final time report
= time() - T_0
Total_time print(Total_time)
127.52515482902527
="SBRD", varName="S4_twoCycles", title="", mean_name="twoCycle", ylabel="Prob. conver. to two-cycle", axis=1, binomial=True, legend_loc="upper left") plot_single_algorithm(alg_name
="SBRD", varName="S4_nrSteps", mean_name="nrSteps", title="", ylabel="Steps", axis=1, isPositive=True, log=False, legend_loc="upper right") plot_single_algorithm(alg_name
print(round(lambda_vals[int(np.argmax(np.mean(SBRD_S4_twoCycles, axis=-1)>0.5))],2))
print(round(lambda_vals[int(np.argmax(np.mean(SBRD_S4_twoCycles, axis=-1)>0.9))],2))
0.5
0.75
Section B.1. Results of Section 2. also hold for more actions
# Number of simulations per (λ, N) setting
= 1000
samples = [0.09999999999*0.5*i for i in range(21)]
lambda_vals = [100]
A_vals
= np.full((len(lambda_vals), 1, samples), False, dtype=bool)
SBRD_S5_isNash = np.zeros((len(lambda_vals), len(A_vals), samples)) SBRD_S5_nrSteps
2025) #We set the random seed for reproducibility
np.random.seed(= time() # Start timer for entire experiment
T_0 = max(A_vals)
max_A for count_lam, lam in enumerate(lambda_vals): # Loop over each lambda value
for i in range(samples): # Repeat experiment for statistical robustness
= generate_reward_matrix(3, max_A, lam) # Generate a payoff matrix with max_n actions and correlation lambda
G
for count_A, A in enumerate(A_vals): # Now restrict to smaller N if needed
# Run Best Response Dynamics and time it
= best_response_dynamics(G, A)
SBRD_len, SBRD_it, SBRD_val_curr = (SBRD_len == 1)
SBRD_S5_isNash[count_lam, count_A, i] = SBRD_it
SBRD_S5_nrSteps[count_lam, count_A, i]
# Final time report
= time() - T_0
Total_time print(Total_time)
860.9745609760284
="SBRD", varName="S5_isNash", title="", mean_name="isNash", ylabel="Prob. conver. to NE", axis=1, binomial=True, legend_loc="upper left") plot_single_algorithm(alg_name
="SBRD", varName="S5_nrSteps", mean_name="nrSteps", title="", ylabel="Steps", axis=1, isPositive=True, log=False, legend_loc="upper right") plot_single_algorithm(alg_name
Section B.2. Results of Section 2. also hold for more players
# Number of simulations per (λ, N) setting
= 1_000
samples = [0.09999999999*0.5*i for i in range(21)]
lambda_vals = [50]
A_vals
= np.full((len(lambda_vals), 1, samples), False, dtype=bool)
SBRD_S6_isNash = np.zeros((len(lambda_vals), len(A_vals), samples)) SBRD_S6_nrSteps
2025) #We set the random seed for reproducibility
np.random.seed(= time() # Start timer for entire experiment
T_0 = max(A_vals)
max_A for count_lam, lam in enumerate(lambda_vals): # Loop over each lambda value
for i in range(samples): # Repeat experiment for statistical robustness
= generate_reward_matrix(4, max_A, lam) # Generate a payoff matrix with max_n actions and correlation lambda
G
for count_A, A in enumerate(A_vals): # Now restrict to smaller N if needed
# Run Best Response Dynamics and time it
= best_response_dynamics(G, A)
SBRD_len, SBRD_it, SBRD_val_curr = (SBRD_len == 1)
SBRD_S6_isNash[count_lam, count_A, i] = SBRD_it
SBRD_S6_nrSteps[count_lam, count_A, i]
# Final time report
= time() - T_0
Total_time print(Total_time)
8539.04519701004
="SBRD", varName="S6_isNash", title="", mean_name="isNash", ylabel="Prob. conver. to NE", axis=1, binomial=True, legend_loc="upper left") plot_single_algorithm(alg_name
="SBRD", varName="S6_nrSteps", mean_name="nrSteps", title="", ylabel="Steps", axis=1, isPositive=True, log=False, legend_loc="upper right") plot_single_algorithm(alg_name
Section C.1. Results of Section 3. also hold for more actions
# Experimental setup
= 1000
samples = [0.85+0.09999999999*0.25*i for i in range(7)] # List of lambda values to test
lambda_vals = [100]
A_vals
# Outputs to Track
## Policy Gradient Dynamics
= np.full((len(lambda_vals), len(A_vals), samples), False, dtype=bool) # {0,1} veridicity of Nash in Policy Gradient Dynamics
PGD_S7_isNash = np.zeros((len(lambda_vals), len(A_vals), samples)) # Number of iterations in Policy Gradient Dynamics
PGD_S7_numIte = np.zeros((len(lambda_vals), len(A_vals), samples)) # Final payoff for player 0 in Policy Gradient Dynamics
PGD_S7_valFin = np.zeros((len(lambda_vals), len(A_vals), samples)) # Average payoff for player 0 in Policy Gradient Dynamics
PGD_S7_valAvg = np.zeros((len(lambda_vals), len(A_vals), samples)) # Runtime in Policy Gradient Dynamics
PGD_S7_time
## Best Response Dynamics
= np.full((len(lambda_vals), len(A_vals), samples), False, dtype=bool) # {0,1} veridicity of Nash in Best Response Dynamics
SBRD_S7_isNash = np.zeros((len(lambda_vals), len(A_vals), samples)) # Number of iterations in Best Response Dynamics
SBRD_S7_numIte = np.zeros((len(lambda_vals), len(A_vals), samples)) # Final payoff for player 0 in Best Response Dynamics
SBRD_S7_val = np.zeros((len(lambda_vals), len(A_vals), samples)) # Runtime in Best Response Dynamics SBRD_S7_time
= A_vals[-1] # Maximum number of actions (fixed at 50)
max_A 2025) #We set the random seed for reproducibility
np.random.seed(
= time() # Start the global timer
T_0
for count_lam, lam in enumerate(lambda_vals): # Loop over correlation values
for i in range(samples): # Loop over sample repetitions
# Generate a shared reward matrix for both algorithms
= generate_reward_matrix(3, max_A, lam)
G
for count_A, A in enumerate(A_vals): # Loop over population sizes (fixed here)
= time() # Start timer for PGD
t_0 = gradient_ascent_dynamics(G, A)
PGD_it, PGD_avg_val, PGD_val = time() - t_0 # Elapsed time
t
= (PGD_it < 50_000 - 1) # Converged?
PGD_S7_isNash[count_lam, count_A, i] = PGD_it
PGD_S7_numIte[count_lam, count_A, i] = PGD_val
PGD_S7_valFin[count_lam, count_A, i] = PGD_avg_val
PGD_S7_valAvg[count_lam, count_A, i] = t
PGD_S7_time[count_lam, count_A, i]
# === Best Response Dynamics (SBRD) ===
= time() # Start timer for SBRD
t_0 = best_response_dynamics(G, A)
SBRD_len, SBRD_it, SBRD_val_curr = time() - t_0 # Elapsed time
t
# Store SBRD results
= (SBRD_len == 1) # Converged to pure NE?
SBRD_S7_isNash[count_lam, count_A, i] = SBRD_it
SBRD_S7_numIte[count_lam, count_A, i] = SBRD_val_curr
SBRD_S7_val[count_lam, count_A, i] = t
SBRD_S7_time[count_lam, count_A, i]
# === Time Summary ===
= time() - T_0
Tot_time = np.sum(SBRD_S7_time)
Tot_time_SBRD = np.sum(PGD_S7_time)
Tot_time_PGD = Tot_time - Tot_time_SBRD - Tot_time_PGD
Tot_time_gener
# Print runtime breakdown
print(f"Total time to run: {round(Tot_time, 1)} s")
print(f"Percentage of the time spent on random generation of matrix: {round((Tot_time_gener / Tot_time) * 100, 1)}%")
print(f"Percentage of the time spent on SBRD algorithm: {round((Tot_time_SBRD / Tot_time) * 100, 1)}%")
print(f"Percentage of the time spent on PGD algorithm: {round((Tot_time_PGD / Tot_time) * 100, 1)}%")
Total time to run: 35111.9 s
Percentage of the time spent on random generation of matrix: 0.7%
Percentage of the time spent on SBRD algorithm: 0.0%
Percentage of the time spent on PGD algorithm: 99.3%
="S7_time", SBRD_varName="S7_time", title="", mean_1_name="SPGD±2SE", mean_2_name="SBRD±2SE", yLabel="Seconds", axis=1, variance=True, isPositive=True, log=True, legend_loc="upper right") compare_algorithms(PGD_varName
="S7_valFin", SBRD_varName="S7_val", title="", mean_1_name="SPGD±2SE", mean_2_name="SBRD±2SE", yLabel="Value", axis=1, variance=True, isPositive=True, only_if_conv=True, legend_loc="upper left") compare_algorithms(PGD_varName
="S7_numIte", SBRD_varName="S7_numIte", title="", mean_1_name="SPGD±2SE", mean_2_name="SBRD±2SE", yLabel="Steps", axis=1, variance=True, isPositive=True, log=True, legend_loc="upper right") compare_algorithms(PGD_varName
="S7_valAvg", SBRD_varName="S7_val", title="", mean_1_name="SPGD±2SE", mean_2_name="SBRD±2SE", yLabel="Value", axis=1, variance=True, isPositive=True, legend_loc="upper right") compare_algorithms(PGD_varName