Utility functions (please skip to the next section)

import numpy as np
import math
import matplotlib.pyplot as plt
from time import time
import scipy.stats as stats
from scipy.stats import beta
from functools import reduce
from operator import matmul 
def softmax(r):
# Define a softmax function.
    exp_r = np.exp(r - np.max(r))
    return exp_r / np.sum(exp_r)

def gradient_ascent_dynamics(G, n, eta=1.0, max_iter=50_000, T_max=10): 
# The following function implements a (softmax) gradient ascent algorithm for a 3-player game with n actions.
# It takes the number of actions n, a correlation parameter lam, a learning rate eta, a max iteration count,
# and a tolerance for convergence as inputs. It returns the average payoff, the number of iterations,
# and the final payoff for player 0.    
    t_0 = time()
    # Initialise each player's "logit" vector (which will be mapped to a mixed strategy via softmax).
    r0 = np.zeros(n)
    r1 = np.zeros(n)
    r2 = np.zeros(n)
    
    total_payoff = 0.0
    
    for it in range(max_iter):
        # Convert logits to mixed strategies.
        p0 = softmax(r0)
        p1 = softmax(r1)
        p2 = softmax(r2)

        
        # Compute the expected payoff for each pure action for each player.
        # For player 0: Q0[i] = sum_{j,k} p1[j] p2[k] G[0][i, j, k]
        Q0 = np.array([np.dot(p1, np.dot(G[0][i, :n, :n], p2)) for i in range(n)])
        # For player 1: Q1[i] = sum_{j,k} p0[j] p2[k] G[1][j, i, k]
        Q1 = np.array([np.dot(p0, np.dot(G[1][:n, i, :n], p2)) for i in range(n)])
        # For player 2: Q2[i] = sum_{j,k} p0[j] p1[k] G[2][j, k, i]
        Q2 = np.array([np.dot(p0, np.dot(G[2][:n, :n, i], p1)) for i in range(n)])
        
        # Compute the overall expected payoff for each player.
        f0 = np.dot(p0, Q0)
        f1 = np.dot(p1, Q1)
        f2 = np.dot(p2, Q2)
        
        # Accumulate payoff for averaging.
        total_payoff += f0

        # Every 100 iterations, check if the current strategy profile is an epsilon-NE.
        if it % 100 == 0:
            best0 = np.max(Q0)
            best1 = np.max(Q1)
            best2 = np.max(Q2)
            if ((f0 >= 0.99 * best0) and (f1 >= 0.99 * best1) and (f2 >= 0.99 * best2)) or (time()-t_0 > T_max):
                break


        grad_r0 = Q0 - f0
        grad_r1 = Q1 - f1
        grad_r2 = Q2 - f2
        
        # Update the logits using a simple gradient ascent step.
        r0 += eta * grad_r0 
        r1 += eta * grad_r1 
        r2 += eta * grad_r2 
    
    avg_payoff = total_payoff / it
    return it, avg_payoff, f0


# Hyperparameters for gradient ascent. These numbers have been selected after some trial and error, and provided the best results for this algorithm.
step_size = 1.0
max_iterations = 50_000

These are utility function only used to plot

def clopper_pearson_interval(k, n, alpha=0.005):
    lower = beta.ppf(alpha / 2, k, n - k + 1) if k > 0 else 0.0
    upper = beta.ppf(1 - alpha / 2, k + 1, n - k) if k < n else 1.0
    return lower, upper

def plot_single_algorithm(alg_name="SBRD", varName="time", ylabel=None, axis=0, mean_name=None, variance=True, only_if_conv=False, log=False, n_cols=1, number_of_plots=-1, binomial=False, isPositive=False, width=6, legend_loc = "upper left", title=None):
    """
    Plot runtimes of a single algorithm (PGD or SBRD) in subplots, comparing across one parameter.

    Parameters:
    - alg_name: 'PGD' or 'SBRD'
    - varName: variable name suffix for accessing data (e.g., "Seconds" accesses PGD_Seconds or SBRD_Seconds)
    - axis: 0 or 1
        - 0: compare across N for each Lambda (row-wise)
        - 1: compare across Lambda for each N (column-wise)
    - variance: whether to include standard deviation shading
    - only_if_conv: whether to only include data when convergence was achieved (using *_isNash)
    - log: whether to plot on a log scale
    - n_cols: number of columns of subplots to create
    - number_of_plots: number of subplots to show (-1 for all)
    - lambda_vals: optional list/array of Lambda values
    - A_vals: optional list/array of N values
    - binomial: whether the data is binomial (0-1 values)
    """

    if ylabel == None:
        ylabel = varName

    # If the function input "only_if_conv" is True, only include the cases in which the samples for which algorithm converges
    data = globals()[f"{alg_name}_{varName}"]
    if only_if_conv:
        isNash = globals()[f"{alg_name}_isNash"]
        masked_data = np.where(isNash, data, np.nan)
    else:
        masked_data = data

    """
    Creates the matrix of means and the matrices needed for the Confidence Intervals.
    If the data is Binary, the CI is done using the Clopper Pearson interval. If the data is real, we use the Standard Error
    """

    mean_data = np.nanmean(masked_data, axis=-1)
    if variance:
        n_data = np.sum(~np.isnan(masked_data), axis=-1)
        if binomial:
            sum_data = np.nansum(masked_data, axis=-1)
            lower_bounds = np.zeros_like(mean_data)
            upper_bounds = np.ones_like(mean_data)
            for i in range(mean_data.shape[0]):
                for j in range(mean_data.shape[1]):
                    if n_data[i, j] > 0:
                        lower, upper = clopper_pearson_interval(int(sum_data[i, j]), int(n_data[i, j]))
                        lower_bounds[i, j] = lower
                        upper_bounds[i, j] = upper
        else:
            SE_data = np.sqrt(np.divide(np.nanvar(masked_data, axis=-1), n_data))

    L, N = mean_data.shape

    """ 
    Depending on wether we want the plots to be done along the 0 or 1 axis, we get different "get_mean", "get_lower" and "get_upper"
    functions, that take as input i and return the right vector (respectively the mean, lower and upper CI points) with that index.
    """
    if axis == 0:
        num_plots = L
        x_vals = A_vals 
        y_vals = lambda_vals 
        get_mean = lambda i: mean_data[i, :]
        if variance:
            if binomial:
                get_lower = lambda i: lower_bounds[i, :]
                get_upper = lambda i: upper_bounds[i, :]
            else:
                get_var = lambda i: SE_data[i, :]
        label = "Correlation λ"
        xlabel = "Nr. actions"
    elif axis == 1:
        num_plots = N
        x_vals = lambda_vals
        y_vals = A_vals 
        get_mean = lambda i: mean_data[:, i]
        if variance:
            if binomial:
                get_lower = lambda i: lower_bounds[:, i]
                get_upper = lambda i: upper_bounds[:, i]
            else:
                get_var = lambda i: SE_data[:, i]
        label = "Nr. actions"
        xlabel = "Correlation λ"
    else:
        raise ValueError("Axis must be 0 (over Lambda) or 1 (over N)")
    


    """
    We now create the structure for the plots
    """
    if number_of_plots == -1 or number_of_plots >= num_plots:
        indices = list(range(num_plots))
    else:
        indices = np.linspace(0, num_plots - 1, number_of_plots, dtype=int)

    n_rows = math.ceil(len(indices) / n_cols)
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(width * n_cols, 3 * n_rows), sharex=True)
    axes = np.array(axes).flatten()

    color = (0.1, 0.5, 0.8) if alg_name == "PGD" else (0.9, 0.4, 0.2)

    """
    We finally plot the various graphs
    """

    for i, idx in enumerate(indices):
        ax = axes[i]
        ax.set_xlabel(xlabel)
        if log:
            ax.set_yscale('log')
            ydata = get_mean(idx)
            ymin = np.nanmin(ydata)
            ymax = np.nanmax(ydata)

            # Round to nearest powers of 10
            lower = 10 ** np.floor(np.log10(ymin))
            upper = 10 ** np.ceil(np.log10(ymax))
            ax.set_ylim(lower, upper)
        ax.plot(x_vals, get_mean(idx), color=color, label=mean_name if mean_name!= None else varName)
        if variance:
            if binomial:
                ax.fill_between(x_vals, get_lower(idx), get_upper(idx), color=color, alpha=0.2, label='99.5% CI')
            else:
                if isPositive:
                    lower = np.clip(get_mean(idx) - 2*get_var(idx), 0, None)
                    upper = np.clip(get_mean(idx) + 2*get_var(idx), 0, None)
                ax.fill_between(x_vals, lower, upper, color=color, alpha=0.2, label='±2 SE')
        ax.set_ylabel(ylabel)
        if title == None:
            ax.set_title(f"{label} = {round(y_vals[idx], 2)}, samples = {samples}")
        else:
            ax.set_title(title)
        ax.legend(loc=legend_loc)
        ax.grid(True)

    for j in range(len(indices), len(axes)):
        fig.delaxes(axes[j])

    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.savefig(f"{alg_name}_{varName}_samples={samples} axis={axis}.pdf")
    plt.show()



def compare_algorithms(PGD_varName="time", SBRD_varName="time", yLabel=None, axis=0, mean_1_name=None, mean_2_name=None, title=None, variance=True, only_if_conv=False, log=False, n_cols=1, number_of_plots=-1, binomial=False, width = 6, isPositive=False, legend_loc="upper left"):
    """
    Plot runtimes of PGD and SBRD algorithms in subplots, comparing them across one parameter.

    Parameters:
    - PGD_varName, SBRD_varName: variable name suffix for accessing data (e.g., "time" accesses PGD_time and SBRD_time)
    - axis: 0 or 1
        - 0: compare across N for each Lambda (row-wise)
        - 1: compare across Lambda for each N (column-wise)
    - variance: whether to include standard deviation shading
    - only_if_conv: whether to only include data when convergence was achieved (using PGD_isNash, SBRD_isNash)
    - log: whether to plot on a log scale
    - n_cols: number of columns of subplots to create
    - lambda_vals: global list/array of Lambda values
    - A_vals: global list/array of N values
    - number_of_plots: number of subplots to display (equally spaced). If -1, show all.
    - binomial: whether the data is binomial (0-1)
    - isPositive: whether to clip the lower bound at 0
    """
    if yLabel == None:
        yLabel = PGD_varName

    def process_data(alg_name, varName):
        data = globals()[f"{alg_name}_{varName}"]
        if only_if_conv:
            isNash = globals()[f"{alg_name}_S7_isNash"]
            data = np.where(isNash, data, np.nan)
        mean_data = np.nanmean(data, axis=-1)
        n_data = np.sum(~np.isnan(data), axis=-1)

        if not variance:
            return mean_data, None, None

        if binomial:
            sum_data = np.nansum(data, axis=-1)
            lower_bounds = np.zeros_like(mean_data)
            upper_bounds = np.ones_like(mean_data)
            for i in range(mean_data.shape[0]):
                for j in range(mean_data.shape[1]):
                    if n_data[i, j] > 0:
                        lower, upper = clopper_pearson_interval(int(sum_data[i, j]), int(n_data[i, j]))
                        lower_bounds[i, j] = lower
                        upper_bounds[i, j] = upper
            return mean_data, lower_bounds, upper_bounds
        else:
            SE = np.sqrt(np.nanvar(data, axis=-1) / n_data)
            lower_bounds = mean_data - 2 * SE
            upper_bounds = mean_data + 2 * SE
            return mean_data, lower_bounds, upper_bounds

    PGD_mean, PGD_lower, PGD_upper = process_data("PGD", PGD_varName)
    SBRD_mean, SBRD_lower, SBRD_upper = process_data("SBRD", SBRD_varName)

    L, N = PGD_mean.shape
    if axis == 0:
        num_plots = L
        x_vals = A_vals
        y_vals = lambda_vals
        get_slice = lambda data, i: data[i, :]
        label = "Correlation λ"
        xlabel = "Nr. actions"
    elif axis == 1:
        num_plots = N
        x_vals = lambda_vals
        y_vals = A_vals
        get_slice = lambda data, i: data[:, i]
        label = "Nr. actions"
        xlabel = "Correlation λ"
    else:
        raise ValueError("Axis must be 0 (over Lambda) or 1 (over N)")

    indices = range(num_plots) if number_of_plots == -1 or number_of_plots >= num_plots else np.linspace(0, num_plots - 1, number_of_plots, dtype=int)
    n_rows = math.ceil(len(indices) / n_cols)
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(width * n_cols, 3 * n_rows), sharex=True)
    axes = np.array(axes).flatten()

    colors = {"PGD": (0.1, 0.5, 0.8), "SBRD": (0.9, 0.4, 0.2)}

    for i, idx in enumerate(indices):
        ax = axes[i]
        ax.set_xlabel(xlabel)
        if log:
            ax.set_yscale('log')

        for name, mean, lower, upper, color in [
            ("PGD", PGD_mean, PGD_lower, PGD_upper, colors["PGD"]),
            ("SBRD", SBRD_mean, SBRD_lower, SBRD_upper, colors["SBRD"])
        ]:
            y = get_slice(mean, idx)
            if variance:
                if binomial:
                    if name == "PGD":
                        curr_label = mean_1_name
                    if name == "SBRD":
                        curr_label = mean_2_name
                else:
                    if name == "PGD":
                        curr_label = mean_1_name
                    if name == "SBRD":
                        curr_label = mean_2_name
            ax.plot(x_vals, y, label=curr_label, color=color)
            if variance and lower is not None and upper is not None:
                y_lower = get_slice(lower, idx)
                y_upper = get_slice(upper, idx)
                if isPositive:
                    y_lower = np.clip(y_lower, 0, None)
                    y_upper = np.clip(y_upper, 0, None)
                #label_ci = "99.5% CI" if binomial else "±2SE"
                ax.fill_between(x_vals, y_lower, y_upper, color=color, alpha=0.2)#, label=f"{label_ci}")

        ax.set_ylabel(yLabel)
        if title == None:
            ax.set_title(f"{label} = {round(y_vals[idx], 2)}, samples = {samples}")
        else:
            ax.set_title(title)
        ax.legend(loc= legend_loc)
        ax.grid(True)

    for j in range(len(indices), len(axes)):
        fig.delaxes(axes[j])

    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.savefig(f"compare_{PGD_varName}_axis={axis}.pdf")
    plt.show()

Dynamics definitions

def generate_reward_matrix(P, A, lam):
    """
    Generate the reward matrix for an P-player A-actions game 

    Parameters:
    - N (int): Number of actions per player.
    - lam (float): Correlation between players' payoffs.

    Returns:
    - np.ndarray: Reward tensor G of shape (3, N, N, N). Each entry G[i, a, b, c]
      is the payoff for player i when players 1, 2, 3 play actions a, b, c respectively.
    """
    # Correlation matrix across players
    diagonal = np.diag(np.ones(P))
    ones_matrix = np.ones((P, P))
    corr_matrix = (ones_matrix-np.diag(np.diag(ones_matrix)))*lam+diagonal
    
    # Cholesky decomposition for generating correlated normal variables
    L = np.linalg.cholesky(corr_matrix)
    
    # Generate correlated payoffs for all joint action profiles
    raw_samples = np.random.randn(P, A**P)
    correlated = L @ raw_samples
    
    shape = tuple(P if i==0 else A for i in range(P+1))

    # Reshape to (3, N, N, N): one payoff matrix per player
    return correlated.reshape(shape)
def best_response_dynamics(G, A=None):
    """
    Simulate Best Response Dynamics (SBRD) in a 3-player game.

    Parameters:
    - G (np.ndarray): Reward tensor of shape (3, N, N, N). G[i, a, b, c] gives the payoff
      to player i when players 0, 1, 2 choose actions a, b, c respectively.

    Returns:
    - cycle_length (int): Length of the cycle detected by SBRD.
    - total_steps (int): Total number of steps before entering the cycle.
    - final_payoff (float): Average payoff for player 0 over the detected cycle.
    """
    P = G.shape[0]
    if A == None:
        A = G.shape[1]

    x = tuple(0 for _ in range(P))        # Initial strategy profile
    index = 0                             # Step counter
    history = dict()                      # Maps strategy profiles to step indices

    while True:
        index += 1

        # Best responses given others' actions
        x = tuple( np.argmax( G[ tuple(i if j == 0 else x[j-1] if j-1 != i else slice(0, A) for j in range(P+1)) ] ) for i in range(P))


        if x in history:
            cycle_length = index - history[x]

            if cycle_length == 1:
                # Nash Equilibria
                return cycle_length, index, G[tuple(0 if i == 0 else x[i-1] for i in range(P+1))]

            # General cycle detected: compute average payoff for player 0
            cycle = [x]
            avg = G[tuple(0 if i == 0 else x[i-1] for i in range(P+1))]

            for _ in range(cycle_length - 1):
                x = tuple( np.argmax( G[ tuple(i if j == 0 else x[j-1] if j-1 != i else slice(0, A) for j in range(P+1)) ] ) for i in range(P))
                cycle.append(x)
                avg += G[tuple(0 if i == 0 else x[i-1] for i in range(P+1))]

            return cycle_length, index, avg / cycle_length

        # Log the current profile and continue
        history[x] = index
def softmax(r):
# Define a softmax function.
    exp_r = np.exp(r - np.max(r))
    return exp_r / np.sum(exp_r)
def new_SPGD(G, eta=1.0, max_iter=50_000, T_max=10): 
# The following function implements a (softmax) gradient ascent algorithm for a 3-player game with n actions.
# It takes the number of actions n, a correlation parameter lam, a learning rate eta, a max iteration count,
# and a tolerance for convergence as inputs. It returns the average payoff, the number of iterations,
# and the final payoff for player 0.    
    t_0 = time()
    # Initialise each player's "logit" vector (which will be mapped to a mixed strategy via softmax).
    P, A = G.shape[0:2]

    R = tuple(np.zeros(A) for _ in range(P))
    
    total_payoff = 0.0
    n=A
    p=[3 for i in range(P)]
    for it in range(max_iter):
        # Convert logits to mixed strategies.
        Softmax = tuple(softmax(R[i]) for i in range(P))
        
        # Compute the expected payoff for each pure action for each player.
        # For player 0: Q0[i] = sum_{j,k} p1[j] p2[k] G[0][i, j, k]
  
        Q = tuple(                       # Q[0], …, Q[P-1]
            np.array([               #   each is an (A, …, A) tensor
                np.take(G[p], i, axis=p)              # fix the i-th action of player p
                @ reduce(matmul,                     # multiply all the other Softmax mats
                        (Softmax[j] for j in range(P) if j != p))
            for i in range(A)])
        for p in range(P))


        # For player 1: Q1[i] = sum_{j,k} p0[j] p2[k] G[1][j, i, k]
        
        # Compute the overall expected payoff for each player.
        F = tuple(np.dot(Softmax[i], Q[i]) for i in range(P))
        
        # Accumulate payoff for averaging.
        total_payoff += F[0]

        # Every 100 iterations, check if the current strategy profile is an epsilon-NE.
        if it % 100 == 0:
            best = tuple(np.max(Q[i]) for i in range(P))
            if (min(best)>0.99) or (time()-t_0 > T_max):
                break


        grad = tuple(Q[i]-F[i] for i in range(P))
        
        # Update the logits using a simple gradient ascent step.
        R = tuple(R[i]+eta*grad[i] for i in range(P))
    
    avg_payoff = total_payoff / it
    return it, avg_payoff, F[0]

Main Results

Section 1. Finding: 2-player random potential games converge fast to a two-cycle

# Number of simulations per (λ, N) setting
samples = 10_000
lambda_vals = [0.09999999999*0.5*i for i in range(21)]
A_vals = [50]

SBRD_S1_twoCycles = np.full((len(lambda_vals), 1, samples), False, dtype=bool)
SBRD_S1_nrSteps = np.zeros((len(lambda_vals), len(A_vals), samples))
np.random.seed(2025) #We set the random seed for reproducibility 
T_0 = time()  # Start timer for entire experiment
max_A = max(A_vals)
for count_lam, lam in enumerate(lambda_vals):  # Loop over each lambda value
    for i in range(samples):  # Repeat experiment for statistical robustness
        G = generate_reward_matrix(2, max_A, lam)  # Generate a payoff matrix with max_n actions and correlation lambda
    
        for count_A, A in enumerate(A_vals):  # Now restrict to smaller N if needed
            # Run Best Response Dynamics and time it
            SBRD_len, SBRD_it, SBRD_val_curr = best_response_dynamics(G, A)
            SBRD_S1_twoCycles[count_lam, count_A, i] = (SBRD_len == 2)
            SBRD_S1_nrSteps[count_lam, count_A, i] = SBRD_it

# Final time report
Total_time = time() - T_0
print(Total_time)
22.372665882110596
plot_single_algorithm(alg_name="SBRD", varName="S1_twoCycles", title="", mean_name="A", ylabel="Prob. conver. to two-cycle", axis=1, binomial=True, legend_loc="upper left")

plot_single_algorithm(alg_name="SBRD", varName="S1_nrSteps", mean_name="nrSteps", title="", ylabel="Steps", axis=1, isPositive=True, log=False, legend_loc="upper right")

print(round(lambda_vals[int(np.argmax(np.mean(SBRD_S1_twoCycles, axis=-1)>0.5))],2))
print(round(lambda_vals[int(np.argmax(np.mean(SBRD_S1_twoCycles, axis=-1)>0.9))],2))
0.55
0.9

Section 2. Finding: 3-player random potential games converge fast to a NE

# Number of simulations per (λ, N) setting
samples = 10_000
lambda_vals = [0.09999999999*0.5*i for i in range(21)]
A_vals = [50]

SBRD_S2_isNash = np.full((len(lambda_vals), 1, samples), False, dtype=bool)
SBRD_S2_nrSteps = np.zeros((len(lambda_vals), len(A_vals), samples))
np.random.seed(2025) #We set the random seed for reproducibility 
T_0 = time()  # Start timer for entire experiment
max_A = max(A_vals)
for count_lam, lam in enumerate(lambda_vals):  # Loop over each lambda value
    for i in range(samples):  # Repeat experiment for statistical robustness
        G = generate_reward_matrix(3, max_A, lam)  # Generate a payoff matrix with max_n actions and correlation lambda
    
        for count_A, A in enumerate(A_vals):  # Now restrict to smaller N if needed
            # Run Best Response Dynamics and time it
            SBRD_len, SBRD_it, SBRD_val_curr = best_response_dynamics(G, A)
            SBRD_S2_isNash[count_lam, count_A, i] = (SBRD_len == 1)
            SBRD_S2_nrSteps[count_lam, count_A, i] = SBRD_it

# Final time report
Total_time = time() - T_0
print(Total_time)
1235.6397669315338
plot_single_algorithm(alg_name="SBRD", varName="S2_isNash", title="", mean_name="isNash", ylabel="Prob. conver. to NE", axis=1, binomial=True, legend_loc="upper left")

plot_single_algorithm(alg_name="SBRD", varName="S2_nrSteps", mean_name="nrSteps", title="", ylabel="Steps", axis=1, isPositive=True, log=False, legend_loc="upper right")

Section 3. Finding: comparison of SBRD and SPGD in 3-player random potential games

# Experimental setup
samples = 1000
lambda_vals = [0.85+0.09999999999*0.25*i for i in range(7)] # List of lambda values to test
A_vals = [50]

# Outputs to Track
## Policy Gradient Dynamics
PGD_S3_isNash = np.full((len(lambda_vals), len(A_vals), samples), False, dtype=bool) # {0,1} veridicity of Nash in Policy Gradient Dynamics
PGD_S3_numIte = np.zeros((len(lambda_vals), len(A_vals), samples)) # Number of iterations in Policy Gradient Dynamics
PGD_S3_valFin = np.zeros((len(lambda_vals), len(A_vals), samples)) # Final payoff for player 0 in Policy Gradient Dynamics
PGD_S3_valAvg = np.zeros((len(lambda_vals), len(A_vals), samples)) # Average payoff for player 0 in Policy Gradient Dynamics
PGD_S3_time = np.zeros((len(lambda_vals), len(A_vals), samples))   # Runtime in Policy Gradient Dynamics

## Best Response Dynamics
SBRD_S3_isNash = np.full((len(lambda_vals), len(A_vals), samples), False, dtype=bool) # {0,1} veridicity of Nash in Best Response Dynamics
SBRD_S3_numIte = np.zeros((len(lambda_vals), len(A_vals), samples)) # Number of iterations in Best Response Dynamics
SBRD_S3_val = np.zeros((len(lambda_vals), len(A_vals), samples)) # Final payoff for player 0 in Best Response Dynamics
SBRD_S3_time = np.zeros((len(lambda_vals), len(A_vals), samples))   # Runtime in Best Response Dynamics
max_A = A_vals[-1]  # Maximum number of actions (fixed at 50)
np.random.seed(2025) #We set the random seed for reproducibility 

T_0 = time()  # Start the global timer

for count_lam, lam in enumerate(lambda_vals):  # Loop over correlation values
    for i in range(samples):  # Loop over sample repetitions
        # Generate a shared reward matrix for both algorithms
        G = generate_reward_matrix(3, max_A, lam)

        for count_A, A in enumerate(A_vals):  # Loop over population sizes (fixed here)
            t_0 = time()  # Start timer for PGD
            PGD_it, PGD_avg_val, PGD_val = gradient_ascent_dynamics(G, A)
            t = time() - t_0  # Elapsed time

            PGD_S3_isNash[count_lam, count_A, i] = (PGD_it < 50_000 - 1)  # Converged?
            PGD_S3_numIte[count_lam, count_A, i] = PGD_it
            PGD_S3_valFin[count_lam, count_A, i] = PGD_val
            PGD_S3_valAvg[count_lam, count_A, i] = PGD_avg_val
            PGD_S3_time[count_lam, count_A, i] = t

            # === Best Response Dynamics (SBRD) ===
            t_0 = time()  # Start timer for SBRD
            SBRD_len, SBRD_it, SBRD_val_curr = best_response_dynamics(G, A)
            t = time() - t_0  # Elapsed time

            # Store SBRD results
            SBRD_S3_isNash[count_lam, count_A, i] = (SBRD_len == 1)  # Converged to pure NE?
            SBRD_S3_numIte[count_lam, count_A, i] = SBRD_it
            SBRD_S3_val[count_lam, count_A, i] = SBRD_val_curr
            SBRD_S3_time[count_lam, count_A, i] = t

# === Time Summary ===
Tot_time = time() - T_0
Tot_time_SBRD = np.sum(SBRD_S3_time)
Tot_time_PGD = np.sum(PGD_S3_time)
Tot_time_gener = Tot_time - Tot_time_SBRD - Tot_time_PGD

# Print runtime breakdown
print(f"Total time to run: {round(Tot_time, 1)} s")
print(f"Percentage of the time spent on random generation of matrix: {round((Tot_time_gener / Tot_time) * 100, 1)}%")
print(f"Percentage of the time spent on SBRD algorithm: {round((Tot_time_SBRD / Tot_time) * 100, 1)}%")
print(f"Percentage of the time spent on PGD algorithm: {round((Tot_time_PGD / Tot_time) * 100, 1)}%")
Total time to run: 4802.2 s
Percentage of the time spent on random generation of matrix: 0.7%
Percentage of the time spent on SBRD algorithm: 0.1%
Percentage of the time spent on PGD algorithm: 99.2%
compare_algorithms(PGD_varName="S3_time", SBRD_varName="S3_time", title="", mean_1_name="SPGD±2SE", mean_2_name="SBRD±2SE", yLabel="Seconds", axis=1, variance=True, isPositive=True, log=True, legend_loc="upper right")
/var/folders/v1/4466w3pn1216dm8152kjp_3r0000gn/T/ipykernel_18423/3723386220.py:260: UserWarning: Data has no positive values, and therefore cannot be log-scaled.
  ax.fill_between(x_vals, y_lower, y_upper, color=color, alpha=0.2)#, label=f"{label_ci}")
/var/folders/v1/4466w3pn1216dm8152kjp_3r0000gn/T/ipykernel_18423/3723386220.py:273: UserWarning: Data has no positive values, and therefore cannot be log-scaled.
  plt.tight_layout(rect=[0, 0, 1, 0.96])

compare_algorithms(PGD_varName="S3_valFin", SBRD_varName="S3_val", title="", mean_1_name="SPGD±2SE", mean_2_name="SBRD±2SE", yLabel="Value", axis=1, variance=True, isPositive=True, only_if_conv=True, legend_loc="upper left")

compare_algorithms(PGD_varName="S3_numIte", SBRD_varName="S3_numIte", title="", mean_1_name="SPGD±2SE", mean_2_name="SBRD±2SE", yLabel="SSteps", axis=1, variance=True, isPositive=True, log=True, legend_loc="upper right")

compare_algorithms(PGD_varName="S3_valAvg", SBRD_varName="S3_val", title="", mean_1_name="SPGD±2SE", mean_2_name="SBRD±2SE", yLabel="Value", axis=1, variance=True, isPositive=True, legend_loc="upper right")

Appendix

Section A. Results of Section 1. also hold for more actions

# Number of simulations per (λ, N) setting
samples = 1_000
lambda_vals = [0.09999999999*0.5*i for i in range(21)]
A_vals = [500]

SBRD_S4_twoCycles = np.full((len(lambda_vals), 1, samples), False, dtype=bool)
SBRD_S4_nrSteps = np.zeros((len(lambda_vals), len(A_vals), samples))
np.random.seed(2025) #We set the random seed for reproducibility 
T_0 = time()  # Start timer for entire experiment
max_A = max(A_vals)
for count_lam, lam in enumerate(lambda_vals):  # Loop over each lambda value
    for i in range(samples):  # Repeat experiment for statistical robustness
        G = generate_reward_matrix(2, max_A, lam)  # Generate a payoff matrix with max_n actions and correlation lambda
    
        for count_A, A in enumerate(A_vals):  # Now restrict to smaller N if needed
            # Run Best Response Dynamics and time it
            SBRD_len, SBRD_it, SBRD_val_curr = best_response_dynamics(G, A)
            SBRD_S4_twoCycles[count_lam, count_A, i] = (SBRD_len == 2)
            SBRD_S4_nrSteps[count_lam, count_A, i] = SBRD_it

# Final time report
Total_time = time() - T_0
print(Total_time)
127.52515482902527
plot_single_algorithm(alg_name="SBRD", varName="S4_twoCycles", title="", mean_name="twoCycle", ylabel="Prob. conver. to two-cycle", axis=1, binomial=True, legend_loc="upper left")

plot_single_algorithm(alg_name="SBRD", varName="S4_nrSteps", mean_name="nrSteps", title="", ylabel="Steps", axis=1, isPositive=True, log=False, legend_loc="upper right")

print(round(lambda_vals[int(np.argmax(np.mean(SBRD_S4_twoCycles, axis=-1)>0.5))],2))
print(round(lambda_vals[int(np.argmax(np.mean(SBRD_S4_twoCycles, axis=-1)>0.9))],2))
0.5
0.75

Section B.1. Results of Section 2. also hold for more actions

# Number of simulations per (λ, N) setting
samples = 1000
lambda_vals = [0.09999999999*0.5*i for i in range(21)]
A_vals = [100]

SBRD_S5_isNash = np.full((len(lambda_vals), 1, samples), False, dtype=bool)
SBRD_S5_nrSteps = np.zeros((len(lambda_vals), len(A_vals), samples))
np.random.seed(2025) #We set the random seed for reproducibility 
T_0 = time()  # Start timer for entire experiment
max_A = max(A_vals)
for count_lam, lam in enumerate(lambda_vals):  # Loop over each lambda value
    for i in range(samples):  # Repeat experiment for statistical robustness
        G = generate_reward_matrix(3, max_A, lam)  # Generate a payoff matrix with max_n actions and correlation lambda
    
        for count_A, A in enumerate(A_vals):  # Now restrict to smaller N if needed
            # Run Best Response Dynamics and time it
            SBRD_len, SBRD_it, SBRD_val_curr = best_response_dynamics(G, A)
            SBRD_S5_isNash[count_lam, count_A, i] = (SBRD_len == 1)
            SBRD_S5_nrSteps[count_lam, count_A, i] = SBRD_it

# Final time report
Total_time = time() - T_0
print(Total_time)
860.9745609760284
plot_single_algorithm(alg_name="SBRD", varName="S5_isNash", title="", mean_name="isNash", ylabel="Prob. conver. to NE", axis=1, binomial=True, legend_loc="upper left")

plot_single_algorithm(alg_name="SBRD", varName="S5_nrSteps", mean_name="nrSteps", title="", ylabel="Steps", axis=1, isPositive=True, log=False, legend_loc="upper right")

Section B.2. Results of Section 2. also hold for more players

# Number of simulations per (λ, N) setting
samples = 1_000
lambda_vals = [0.09999999999*0.5*i for i in range(21)]
A_vals = [50]

SBRD_S6_isNash = np.full((len(lambda_vals), 1, samples), False, dtype=bool)
SBRD_S6_nrSteps = np.zeros((len(lambda_vals), len(A_vals), samples))
np.random.seed(2025) #We set the random seed for reproducibility 
T_0 = time()  # Start timer for entire experiment
max_A = max(A_vals)
for count_lam, lam in enumerate(lambda_vals):  # Loop over each lambda value
    for i in range(samples):  # Repeat experiment for statistical robustness
        G = generate_reward_matrix(4, max_A, lam)  # Generate a payoff matrix with max_n actions and correlation lambda
    
        for count_A, A in enumerate(A_vals):  # Now restrict to smaller N if needed
            # Run Best Response Dynamics and time it
            SBRD_len, SBRD_it, SBRD_val_curr = best_response_dynamics(G, A)
            SBRD_S6_isNash[count_lam, count_A, i] = (SBRD_len == 1)
            SBRD_S6_nrSteps[count_lam, count_A, i] = SBRD_it

# Final time report
Total_time = time() - T_0
print(Total_time)
8539.04519701004
plot_single_algorithm(alg_name="SBRD", varName="S6_isNash", title="", mean_name="isNash", ylabel="Prob. conver. to NE", axis=1, binomial=True, legend_loc="upper left")

plot_single_algorithm(alg_name="SBRD", varName="S6_nrSteps", mean_name="nrSteps", title="", ylabel="Steps", axis=1, isPositive=True, log=False, legend_loc="upper right")

Section C.1. Results of Section 3. also hold for more actions

# Experimental setup
samples = 1000
lambda_vals = [0.85+0.09999999999*0.25*i for i in range(7)] # List of lambda values to test
A_vals = [100]

# Outputs to Track
## Policy Gradient Dynamics
PGD_S7_isNash = np.full((len(lambda_vals), len(A_vals), samples), False, dtype=bool) # {0,1} veridicity of Nash in Policy Gradient Dynamics
PGD_S7_numIte = np.zeros((len(lambda_vals), len(A_vals), samples)) # Number of iterations in Policy Gradient Dynamics
PGD_S7_valFin = np.zeros((len(lambda_vals), len(A_vals), samples)) # Final payoff for player 0 in Policy Gradient Dynamics
PGD_S7_valAvg = np.zeros((len(lambda_vals), len(A_vals), samples)) # Average payoff for player 0 in Policy Gradient Dynamics
PGD_S7_time = np.zeros((len(lambda_vals), len(A_vals), samples))   # Runtime in Policy Gradient Dynamics

## Best Response Dynamics
SBRD_S7_isNash = np.full((len(lambda_vals), len(A_vals), samples), False, dtype=bool) # {0,1} veridicity of Nash in Best Response Dynamics
SBRD_S7_numIte = np.zeros((len(lambda_vals), len(A_vals), samples)) # Number of iterations in Best Response Dynamics
SBRD_S7_val = np.zeros((len(lambda_vals), len(A_vals), samples)) # Final payoff for player 0 in Best Response Dynamics
SBRD_S7_time = np.zeros((len(lambda_vals), len(A_vals), samples))   # Runtime in Best Response Dynamics
max_A = A_vals[-1]  # Maximum number of actions (fixed at 50)
np.random.seed(2025) #We set the random seed for reproducibility 

T_0 = time()  # Start the global timer

for count_lam, lam in enumerate(lambda_vals):  # Loop over correlation values
    for i in range(samples):  # Loop over sample repetitions
        # Generate a shared reward matrix for both algorithms
        G = generate_reward_matrix(3, max_A, lam)

        for count_A, A in enumerate(A_vals):  # Loop over population sizes (fixed here)
            t_0 = time()  # Start timer for PGD
            PGD_it, PGD_avg_val, PGD_val = gradient_ascent_dynamics(G, A)
            t = time() - t_0  # Elapsed time

            PGD_S7_isNash[count_lam, count_A, i] = (PGD_it < 50_000 - 1)  # Converged?
            PGD_S7_numIte[count_lam, count_A, i] = PGD_it
            PGD_S7_valFin[count_lam, count_A, i] = PGD_val
            PGD_S7_valAvg[count_lam, count_A, i] = PGD_avg_val
            PGD_S7_time[count_lam, count_A, i] = t

            # === Best Response Dynamics (SBRD) ===
            t_0 = time()  # Start timer for SBRD
            SBRD_len, SBRD_it, SBRD_val_curr = best_response_dynamics(G, A)
            t = time() - t_0  # Elapsed time

            # Store SBRD results
            SBRD_S7_isNash[count_lam, count_A, i] = (SBRD_len == 1)  # Converged to pure NE?
            SBRD_S7_numIte[count_lam, count_A, i] = SBRD_it
            SBRD_S7_val[count_lam, count_A, i] = SBRD_val_curr
            SBRD_S7_time[count_lam, count_A, i] = t

# === Time Summary ===
Tot_time = time() - T_0
Tot_time_SBRD = np.sum(SBRD_S7_time)
Tot_time_PGD = np.sum(PGD_S7_time)
Tot_time_gener = Tot_time - Tot_time_SBRD - Tot_time_PGD

# Print runtime breakdown
print(f"Total time to run: {round(Tot_time, 1)} s")
print(f"Percentage of the time spent on random generation of matrix: {round((Tot_time_gener / Tot_time) * 100, 1)}%")
print(f"Percentage of the time spent on SBRD algorithm: {round((Tot_time_SBRD / Tot_time) * 100, 1)}%")
print(f"Percentage of the time spent on PGD algorithm: {round((Tot_time_PGD / Tot_time) * 100, 1)}%")
Total time to run: 35111.9 s
Percentage of the time spent on random generation of matrix: 0.7%
Percentage of the time spent on SBRD algorithm: 0.0%
Percentage of the time spent on PGD algorithm: 99.3%
compare_algorithms(PGD_varName="S7_time", SBRD_varName="S7_time", title="", mean_1_name="SPGD±2SE", mean_2_name="SBRD±2SE", yLabel="Seconds", axis=1, variance=True, isPositive=True, log=True, legend_loc="upper right")

compare_algorithms(PGD_varName="S7_valFin", SBRD_varName="S7_val", title="", mean_1_name="SPGD±2SE", mean_2_name="SBRD±2SE", yLabel="Value", axis=1, variance=True, isPositive=True, only_if_conv=True, legend_loc="upper left")

compare_algorithms(PGD_varName="S7_numIte", SBRD_varName="S7_numIte", title="", mean_1_name="SPGD±2SE", mean_2_name="SBRD±2SE", yLabel="Steps", axis=1, variance=True, isPositive=True, log=True, legend_loc="upper right")

compare_algorithms(PGD_varName="S7_valAvg", SBRD_varName="S7_val", title="", mean_1_name="SPGD±2SE", mean_2_name="SBRD±2SE", yLabel="Value", axis=1, variance=True, isPositive=True, legend_loc="upper right")