Utility functions (please skip to the next section)

import numpy as np
import math
import matplotlib.pyplot as plt
from time import time
import scipy.stats as stats
from scipy.stats import beta
from functools import reduce
from operator import matmul 
def softmax(r):
# Define a softmax function.
    exp_r = np.exp(r - np.max(r))
    return exp_r / np.sum(exp_r)

def gradient_ascent_dynamics(G, n, eta=1.0, max_iter=50_000, T_max=10): 
# The following function implements a (softmax) gradient ascent algorithm for a 3-player game with n actions.
# It takes the number of actions n, a correlation parameter lam, a learning rate eta, a max iteration count,
# and a tolerance for convergence as inputs. It returns the average payoff, the number of iterations,
# and the final payoff for player 0.    
    t_0 = time()
    # Initialise each player's "logit" vector (which will be mapped to a mixed strategy via softmax).
    r0 = np.zeros(n)
    r1 = np.zeros(n)
    r2 = np.zeros(n)
    
    total_payoff = 0.0
    
    for it in range(max_iter):
        # Convert logits to mixed strategies.
        p0 = softmax(r0)
        p1 = softmax(r1)
        p2 = softmax(r2)

        
        # Compute the expected payoff for each pure action for each player.
        # For player 0: Q0[i] = sum_{j,k} p1[j] p2[k] G[0][i, j, k]
        Q0 = np.array([np.dot(p1, np.dot(G[0][i, :n, :n], p2)) for i in range(n)])
        # For player 1: Q1[i] = sum_{j,k} p0[j] p2[k] G[1][j, i, k]
        Q1 = np.array([np.dot(p0, np.dot(G[1][:n, i, :n], p2)) for i in range(n)])
        # For player 2: Q2[i] = sum_{j,k} p0[j] p1[k] G[2][j, k, i]
        Q2 = np.array([np.dot(p0, np.dot(G[2][:n, :n, i], p1)) for i in range(n)])
        
        # Compute the overall expected payoff for each player.
        f0 = np.dot(p0, Q0)
        f1 = np.dot(p1, Q1)
        f2 = np.dot(p2, Q2)
        
        # Accumulate payoff for averaging.
        total_payoff += f0

        # Every 100 iterations, check if the current strategy profile is an epsilon-NE.
        if it % 100 == 0:
            best0 = np.max(Q0)
            best1 = np.max(Q1)
            best2 = np.max(Q2)
            if ((f0 >= 0.99 * best0) and (f1 >= 0.99 * best1) and (f2 >= 0.99 * best2)) or (time()-t_0 > T_max):
                break


        grad_r0 = Q0 - f0
        grad_r1 = Q1 - f1
        grad_r2 = Q2 - f2
        
        # Update the logits using a simple gradient ascent step.
        r0 += eta * grad_r0 
        r1 += eta * grad_r1 
        r2 += eta * grad_r2 
    
    avg_payoff = total_payoff / it
    return it, avg_payoff, f0


# Hyperparameters for gradient ascent. These numbers have been selected after some trial and error, and provided the best results for this algorithm.
step_size = 1.0
max_iterations = 50_000

These are utility function only used to plot

def clopper_pearson_interval(k, n, alpha=0.005):
    lower = beta.ppf(alpha / 2, k, n - k + 1) if k > 0 else 0.0
    upper = beta.ppf(1 - alpha / 2, k + 1, n - k) if k < n else 1.0
    return lower, upper

def plot_single_algorithm(alg_name="SBRD", varName="time", ylabel=None, axis=0, mean_name=None, variance=True, only_if_conv=False, log=False, n_cols=1, number_of_plots=-1, binomial=False, isPositive=False, width=6, legend_loc = "upper left", title=None):
    """
    Plot runtimes of a single algorithm (PGD or SBRD) in subplots, comparing across one parameter.

    Parameters:
    - alg_name: 'PGD' or 'SBRD'
    - varName: variable name suffix for accessing data (e.g., "Seconds" accesses PGD_Seconds or SBRD_Seconds)
    - axis: 0 or 1
        - 0: compare across N for each Lambda (row-wise)
        - 1: compare across Lambda for each N (column-wise)
    - variance: whether to include standard deviation shading
    - only_if_conv: whether to only include data when convergence was achieved (using *_isNash)
    - log: whether to plot on a log scale
    - n_cols: number of columns of subplots to create
    - number_of_plots: number of subplots to show (-1 for all)
    - lambda_vals: optional list/array of Lambda values
    - A_vals: optional list/array of N values
    - binomial: whether the data is binomial (0-1 values)
    """

    if ylabel == None:
        ylabel = varName

    # If the function input "only_if_conv" is True, only include the cases in which the samples for which algorithm converges
    data = globals()[f"{alg_name}_{varName}"]
    if only_if_conv:
        isNash = globals()[f"{alg_name}_isNash"]
        masked_data = np.where(isNash, data, np.nan)
    else:
        masked_data = data

    """
    Creates the matrix of means and the matrices needed for the Confidence Intervals.
    If the data is Binary, the CI is done using the Clopper Pearson interval. If the data is real, we use the Standard Error
    """

    mean_data = np.nanmean(masked_data, axis=-1)
    if variance:
        n_data = np.sum(~np.isnan(masked_data), axis=-1)
        if binomial:
            sum_data = np.nansum(masked_data, axis=-1)
            lower_bounds = np.zeros_like(mean_data)
            upper_bounds = np.ones_like(mean_data)
            for i in range(mean_data.shape[0]):
                for j in range(mean_data.shape[1]):
                    if n_data[i, j] > 0:
                        lower, upper = clopper_pearson_interval(int(sum_data[i, j]), int(n_data[i, j]))
                        lower_bounds[i, j] = lower
                        upper_bounds[i, j] = upper
        else:
            SE_data = np.sqrt(np.divide(np.nanvar(masked_data, axis=-1), n_data))

    L, N = mean_data.shape

    """ 
    Depending on wether we want the plots to be done along the 0 or 1 axis, we get different "get_mean", "get_lower" and "get_upper"
    functions, that take as input i and return the right vector (respectively the mean, lower and upper CI points) with that index.
    """
    if axis == 0:
        num_plots = L
        x_vals = A_vals 
        y_vals = lambda_vals 
        get_mean = lambda i: mean_data[i, :]
        if variance:
            if binomial:
                get_lower = lambda i: lower_bounds[i, :]
                get_upper = lambda i: upper_bounds[i, :]
            else:
                get_var = lambda i: SE_data[i, :]
        label = "Correlation λ"
        xlabel = "Nr. actions"
    elif axis == 1:
        num_plots = N
        x_vals = lambda_vals
        y_vals = A_vals 
        get_mean = lambda i: mean_data[:, i]
        if variance:
            if binomial:
                get_lower = lambda i: lower_bounds[:, i]
                get_upper = lambda i: upper_bounds[:, i]
            else:
                get_var = lambda i: SE_data[:, i]
        label = "Nr. actions"
        xlabel = "Correlation λ"
    else:
        raise ValueError("Axis must be 0 (over Lambda) or 1 (over N)")
    


    """
    We now create the structure for the plots
    """
    if number_of_plots == -1 or number_of_plots >= num_plots:
        indices = list(range(num_plots))
    else:
        indices = np.linspace(0, num_plots - 1, number_of_plots, dtype=int)

    n_rows = math.ceil(len(indices) / n_cols)
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(width * n_cols, 3 * n_rows), sharex=True)
    axes = np.array(axes).flatten()

    color = (0.1, 0.5, 0.8) if alg_name == "PGD" else (0.9, 0.4, 0.2)

    """
    We finally plot the various graphs
    """

    for i, idx in enumerate(indices):
        ax = axes[i]
        ax.set_xlabel(xlabel)
        if log:
            ax.set_yscale('log')
            ydata = get_mean(idx)
            ymin = np.nanmin(ydata)
            ymax = np.nanmax(ydata)

            # Round to nearest powers of 10
            lower = 10 ** np.floor(np.log10(ymin))
            upper = 10 ** np.ceil(np.log10(ymax))
            ax.set_ylim(lower, upper)
        ax.plot(x_vals, get_mean(idx), color=color, label=mean_name if mean_name!= None else varName)
        if variance:
            if binomial:
                ax.fill_between(x_vals, get_lower(idx), get_upper(idx), color=color, alpha=0.2, label='99.5% CI')
            else:
                if isPositive:
                    lower = np.clip(get_mean(idx) - 2*get_var(idx), 0, None)
                    upper = np.clip(get_mean(idx) + 2*get_var(idx), 0, None)
                ax.fill_between(x_vals, lower, upper, color=color, alpha=0.2, label='±2 SE')
        ax.set_ylabel(ylabel)
        if title == None:
            ax.set_title(f"{label} = {round(y_vals[idx], 2)}, samples = {samples}")
        else:
            ax.set_title(title)
        ax.legend(loc=legend_loc)
        ax.grid(True)

    for j in range(len(indices), len(axes)):
        fig.delaxes(axes[j])

    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.savefig(f"{alg_name}_{varName}_samples={samples} axis={axis}.pdf")
    plt.show()



def compare_algorithms(PGD_varName="time", SBRD_varName="time", yLabel=None, axis=0, mean_1_name=None, mean_2_name=None, title=None, variance=True, only_if_conv=False, log=False, n_cols=1, number_of_plots=-1, binomial=False, width = 6, isPositive=False, legend_loc="upper left", expnr = "3"):
    """
    Plot runtimes of PGD and SBRD algorithms in subplots, comparing them across one parameter.

    Parameters:
    - PGD_varName, SBRD_varName: variable name suffix for accessing data (e.g., "time" accesses PGD_time and SBRD_time)
    - axis: 0 or 1
        - 0: compare across N for each Lambda (row-wise)
        - 1: compare across Lambda for each N (column-wise)
    - variance: whether to include standard deviation shading
    - only_if_conv: whether to only include data when convergence was achieved (using PGD_isNash, SBRD_isNash)
    - log: whether to plot on a log scale
    - n_cols: number of columns of subplots to create
    - lambda_vals: global list/array of Lambda values
    - A_vals: global list/array of N values
    - number_of_plots: number of subplots to display (equally spaced). If -1, show all.
    - binomial: whether the data is binomial (0-1)
    - isPositive: whether to clip the lower bound at 0
    """
    if yLabel == None:
        yLabel = PGD_varName

    def process_data(alg_name, varName):
        data = globals()[f"{alg_name}_{varName}"]
        if only_if_conv:
            isNash = globals()[f"{alg_name}_S{expnr}_isNash"]
            data = np.where(isNash, data, np.nan)
        mean_data = np.nanmean(data, axis=-1)
        n_data = np.sum(~np.isnan(data), axis=-1)

        if not variance:
            return mean_data, None, None

        if binomial:
            sum_data = np.nansum(data, axis=-1)
            lower_bounds = np.zeros_like(mean_data)
            upper_bounds = np.ones_like(mean_data)
            for i in range(mean_data.shape[0]):
                for j in range(mean_data.shape[1]):
                    if n_data[i, j] > 0:
                        lower, upper = clopper_pearson_interval(int(sum_data[i, j]), int(n_data[i, j]))
                        lower_bounds[i, j] = lower
                        upper_bounds[i, j] = upper
            return mean_data, lower_bounds, upper_bounds
        else:
            SE = np.sqrt(np.nanvar(data, axis=-1) / n_data)
            lower_bounds = mean_data - 2 * SE
            upper_bounds = mean_data + 2 * SE
            return mean_data, lower_bounds, upper_bounds

    PGD_mean, PGD_lower, PGD_upper = process_data("PGD", PGD_varName)
    SBRD_mean, SBRD_lower, SBRD_upper = process_data("SBRD", SBRD_varName)

    L, N = PGD_mean.shape
    if axis == 0:
        num_plots = L
        x_vals = A_vals
        y_vals = lambda_vals
        get_slice = lambda data, i: data[i, :]
        label = "Correlation λ"
        xlabel = "Nr. actions"
    elif axis == 1:
        num_plots = N
        x_vals = lambda_vals
        y_vals = A_vals
        get_slice = lambda data, i: data[:, i]
        label = "Nr. actions"
        xlabel = "Correlation λ"
    else:
        raise ValueError("Axis must be 0 (over Lambda) or 1 (over N)")

    indices = range(num_plots) if number_of_plots == -1 or number_of_plots >= num_plots else np.linspace(0, num_plots - 1, number_of_plots, dtype=int)
    n_rows = math.ceil(len(indices) / n_cols)
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(width * n_cols, 3 * n_rows), sharex=True)
    axes = np.array(axes).flatten()

    colors = {"PGD": (0.1, 0.5, 0.8), "SBRD": (0.9, 0.4, 0.2)}

    for i, idx in enumerate(indices):
        ax = axes[i]
        ax.set_xlabel(xlabel)
        if log:
            ax.set_yscale('log')

        for name, mean, lower, upper, color in [
            ("PGD", PGD_mean, PGD_lower, PGD_upper, colors["PGD"]),
            ("SBRD", SBRD_mean, SBRD_lower, SBRD_upper, colors["SBRD"])
        ]:
            y = get_slice(mean, idx)
            if variance:
                if binomial:
                    if name == "PGD":
                        curr_label = mean_1_name
                    if name == "SBRD":
                        curr_label = mean_2_name
                else:
                    if name == "PGD":
                        curr_label = mean_1_name
                    if name == "SBRD":
                        curr_label = mean_2_name
            ax.plot(x_vals, y, label=curr_label, color=color)
            if variance and lower is not None and upper is not None:
                y_lower = get_slice(lower, idx)
                y_upper = get_slice(upper, idx)
                if isPositive:
                    y_lower = np.clip(y_lower, 0, None)
                    y_upper = np.clip(y_upper, 0, None)
                #label_ci = "99.5% CI" if binomial else "±2SE"
                ax.fill_between(x_vals, y_lower, y_upper, color=color, alpha=0.2)#, label=f"{label_ci}")

        ax.set_ylabel(yLabel)
        if title == None:
            ax.set_title(f"{label} = {round(y_vals[idx], 2)}, samples = {samples}")
        else:
            ax.set_title(title)
        ax.legend(loc= legend_loc)
        ax.grid(True)

    for j in range(len(indices), len(axes)):
        fig.delaxes(axes[j])

    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.savefig(f"compare_{PGD_varName}_axis={axis}.pdf")
    plt.show()

Dynamics definitions

def generate_reward_matrix(P, A, lam):
    """
    Generate the reward matrix for an P-player A-actions game 

    Parameters:
    - N (int): Number of actions per player.
    - lam (float): Correlation between players' payoffs.

    Returns:
    - np.ndarray: Reward tensor G of shape (3, N, N, N). Each entry G[i, a, b, c]
      is the payoff for player i when players 1, 2, 3 play actions a, b, c respectively.
    """
    # Correlation matrix across players
    diagonal = np.diag(np.ones(P))
    ones_matrix = np.ones((P, P))
    corr_matrix = (ones_matrix-np.diag(np.diag(ones_matrix)))*lam+diagonal
    
    # Cholesky decomposition for generating correlated normal variables
    L = np.linalg.cholesky(corr_matrix)
    
    # Generate correlated payoffs for all joint action profiles
    raw_samples = np.random.randn(P, A**P)
    correlated = L @ raw_samples
    
    shape = tuple(P if i==0 else A for i in range(P+1))

    # Reshape to (3, N, N, N): one payoff matrix per player
    return correlated.reshape(shape)
def best_response_dynamics(G, A=None):
    """
    Simulate Best Response Dynamics (SBRD) in a 3-player game.

    Parameters:
    - G (np.ndarray): Reward tensor of shape (3, N, N, N). G[i, a, b, c] gives the payoff
      to player i when players 0, 1, 2 choose actions a, b, c respectively.

    Returns:
    - cycle_length (int): Length of the cycle detected by SBRD.
    - total_steps (int): Total number of steps before entering the cycle.
    - final_payoff (float): Average payoff for player 0 over the detected cycle.
    """
    P = G.shape[0]
    if A == None:
        A = G.shape[1]

    x = tuple(0 for _ in range(P))        # Initial strategy profile
    index = 0                             # Step counter
    history = dict()                      # Maps strategy profiles to step indices

    while True:
        index += 1

        # Best responses given others' actions
        x = tuple( np.argmax( G[ tuple(i if j == 0 else x[j-1] if j-1 != i else slice(0, A) for j in range(P+1)) ] ) for i in range(P))


        if x in history:
            cycle_length = index - history[x]

            if cycle_length == 1:
                # Nash Equilibria
                return cycle_length, index, G[tuple(0 if i == 0 else x[i-1] for i in range(P+1))]

            # General cycle detected: compute average payoff for player 0
            cycle = [x]
            avg = G[tuple(0 if i == 0 else x[i-1] for i in range(P+1))]

            for _ in range(cycle_length - 1):
                x = tuple( np.argmax( G[ tuple(i if j == 0 else x[j-1] if j-1 != i else slice(0, A) for j in range(P+1)) ] ) for i in range(P))
                cycle.append(x)
                avg += G[tuple(0 if i == 0 else x[i-1] for i in range(P+1))]

            return cycle_length, index, avg / cycle_length

        # Log the current profile and continue
        history[x] = index
def softmax(r):
# Define a softmax function.
    exp_r = np.exp(r - np.max(r))
    return exp_r / np.sum(exp_r)
def new_SPGD(G, eta=1.0, max_iter=50_000, T_max=10): 
# The following function implements a (softmax) gradient ascent algorithm for a 3-player game with n actions.
# It takes the number of actions n, a correlation parameter lam, a learning rate eta, a max iteration count,
# and a tolerance for convergence as inputs. It returns the average payoff, the number of iterations,
# and the final payoff for player 0.    
    t_0 = time()
    # Initialise each player's "logit" vector (which will be mapped to a mixed strategy via softmax).
    P, A = G.shape[0:2]

    R = tuple(np.zeros(A) for _ in range(P))
    
    total_payoff = 0.0
    n=A
    p=[3 for i in range(P)]
    for it in range(max_iter):
        # Convert logits to mixed strategies.
        Softmax = tuple(softmax(R[i]) for i in range(P))
        
        # Compute the expected payoff for each pure action for each player.
        # For player 0: Q0[i] = sum_{j,k} p1[j] p2[k] G[0][i, j, k]
  
        Q = tuple(                       # Q[0], …, Q[P-1]
            np.array([               #   each is an (A, …, A) tensor
                np.take(G[p], i, axis=p)              # fix the i-th action of player p
                @ reduce(matmul,                     # multiply all the other Softmax mats
                        (Softmax[j] for j in range(P) if j != p))
            for i in range(A)])
        for p in range(P))


        # For player 1: Q1[i] = sum_{j,k} p0[j] p2[k] G[1][j, i, k]
        
        # Compute the overall expected payoff for each player.
        F = tuple(np.dot(Softmax[i], Q[i]) for i in range(P))
        
        # Accumulate payoff for averaging.
        total_payoff += F[0]

        # Every 100 iterations, check if the current strategy profile is an epsilon-NE.
        if it % 100 == 0:
            best = tuple(np.max(Q[i]) for i in range(P))
            if (min(best)>0.99) or (time()-t_0 > T_max):
                break


        grad = tuple(Q[i]-F[i] for i in range(P))
        
        # Update the logits using a simple gradient ascent step.
        R = tuple(R[i]+eta*grad[i] for i in range(P))
    
    avg_payoff = total_payoff / it
    return it, avg_payoff, F[0]

Main Results

Section 1. Finding: 2-player random potential games converge fast to a two-cycle

Setting of the experiment: - 10000 independent simulations - Each simulation takes lambda in a range of values between 0 and 1 - The number of actions is set to 50 - The number of players is set to two

Statistics analysed: - SBRD_S1_twoCycles[l, 0, t]: True if the t-th test with the l-th value of lambda converges to a 2-cycle - SBRD_S1_nrSteps[l, 0, t]: number of steps to convergence (or cycle) of the t-th test with the l-th value of lambda

# Number of simulations per (λ, N) setting
samples = 10_000
lambda_vals = [0.09999999999*0.5*i for i in range(21)]
A_vals = [50]

SBRD_S1_twoCycles = np.full((len(lambda_vals), 1, samples), False, dtype=bool)
SBRD_S1_nrSteps = np.zeros((len(lambda_vals), len(A_vals), samples))

We now run the experiment, setting the numpy random seed to 2025. We print the time it takes to run the whole experiment in seconds.

np.random.seed(2025) #We set the random seed for reproducibility 
T_0 = time()  # Start timer for entire experiment
max_A = max(A_vals)
for count_lam, lam in enumerate(lambda_vals):  # Loop over each lambda value
    for i in range(samples):  # Repeat experiment for statistical robustness
        G = generate_reward_matrix(2, max_A, lam)  # Generate a payoff matrix with max_n actions and correlation lambda
    
        for count_A, A in enumerate(A_vals):  # Now restrict to smaller N if needed
            # Run Best Response Dynamics and time it
            SBRD_len, SBRD_it, SBRD_val_curr = best_response_dynamics(G, A)
            SBRD_S1_twoCycles[count_lam, count_A, i] = (SBRD_len == 2)
            SBRD_S1_nrSteps[count_lam, count_A, i] = SBRD_it

# Final time report
Total_time = time() - T_0
print(Total_time)
23.52963089942932

We now begin to plot our results. The first plot represents the probability of SBRD to converge to a 2-cycle. It is plotted with a 99.5% Clopper-Pearson CI.

plot_single_algorithm(alg_name="SBRD", varName="S1_twoCycles", title="", mean_name="Prob", ylabel="Prob. conver. to two-cycle", axis=1, binomial=True, legend_loc="upper left")

The second plot represents the average number of steps before convergence. It is plotted with a 2SE interval around the mean.

plot_single_algorithm(alg_name="SBRD", varName="S1_nrSteps", mean_name="nrSteps", title="", ylabel="Steps", axis=1, isPositive=True, log=False, legend_loc="upper right")

With the following script, we investigate the thresholds of lambda for which the probability of SBRD to converge to a 2-cycle surpasses 50% and 90% respectively. Which is, we find that if lambda is at least 0.55, then the probability of SBRD to converge to a 2-cycle is at least 50%. If lambda is at least 0.9, the probability is at least 90%.

print(round(lambda_vals[int(np.argmax(np.mean(SBRD_S1_twoCycles, axis=-1)>0.5))],2))
print(round(lambda_vals[int(np.argmax(np.mean(SBRD_S1_twoCycles, axis=-1)>0.9))],2))
0.55
0.9

Section 2. Finding: 3-player random potential games converge fast to a NE

We now consider the setting with three players. Setting of the experiment: - 10000 independent simulations - Each simulation takes lambda in a range of values between 0 and 1 - The number of actions is set to 50 - The number of players is set to three

Statistics analysed: - SBRD_S2_isNash[l, 0, t]: True if the t-th test with the l-th value of lambda converges to a NE - SBRD_S2_nrSteps[l, 0, t]: number of steps to convergence (or cycle) of the t-th test with the l-th value of lambda

# Number of simulations per (λ, N) setting
samples = 10_000
lambda_vals = [0.09999999999*0.5*i for i in range(21)]
A_vals = [50]

SBRD_S2_isNash = np.full((len(lambda_vals), 1, samples), False, dtype=bool)
SBRD_S2_nrSteps = np.zeros((len(lambda_vals), len(A_vals), samples))

We now run the experiment, setting the numpy random seed to 2025. We print the time it takes to run the whole experiment in seconds.

np.random.seed(2025) #We set the random seed for reproducibility 
T_0 = time()  # Start timer for entire experiment
max_A = max(A_vals)
for count_lam, lam in enumerate(lambda_vals):  # Loop over each lambda value
    for i in range(samples):  # Repeat experiment for statistical robustness
        G = generate_reward_matrix(3, max_A, lam)  # Generate a payoff matrix with max_n actions and correlation lambda
    
        for count_A, A in enumerate(A_vals):  # Now restrict to smaller N if needed
            # Run Best Response Dynamics and time it
            SBRD_len, SBRD_it, SBRD_val_curr = best_response_dynamics(G, A)
            SBRD_S2_isNash[count_lam, count_A, i] = (SBRD_len == 1)
            SBRD_S2_nrSteps[count_lam, count_A, i] = SBRD_it

# Final time report
Total_time = time() - T_0
print(Total_time)
1237.7669422626495

We are now ready to plot our results. We first plot the probability of SBRD to converge to a NE. We plot a 99.5% Clopper-Pearson CI

plot_single_algorithm(alg_name="SBRD", varName="S2_isNash", title="", mean_name="isNash", ylabel="Prob. conver. to NE", axis=1, binomial=True, legend_loc="upper left")

And then the expected number of steps before convergence (or reaching a cycle). We add a 2SE interval to the plot.

plot_single_algorithm(alg_name="SBRD", varName="S2_nrSteps", mean_name="nrSteps", title="", ylabel="Steps", axis=1, isPositive=True, log=False, legend_loc="upper right")

Section 3. Finding: comparison of SBRD and SPGD in 3-player random potential games

We now compare SBRD with SPGD for 3-player games. Setting of the experiment: - 1000 independent simulations - Each simulation takes lambda in a range of values between 0.85 and 1 - The number of actions is set to 50 - The number of players is set to three

Statistics analysed: - For SPGD: - PGD_S3_isNash[l, 0, t]: True if for the t-th test with the l-th value of lambda, SPGD converges to a NE - PGD_S3_numIte[l, 0, t]: number of steps to convergence (or cycle) for SPGD of the t-th test with the l-th value of lambda - PGD_S3_valFin[l, 0, t]: final payoff reached by SBPD in the t-th test with l-th value of lambda - PGD_S3_valAvg[l, 0, t]: average payoff during the trajectory of SBPD in the t-th test with the l-th value of lambda - PGD_S3_time[l, 0, t]: time to run SBPD in the t-th test with l-th value of lambda. - Remark: SPGD is automatically stopped at the first check-point after 10 seconds run. - for SBRD: - SBRD_S3_isNash[l, 0, t]: True if for the t-th test with the l-th value of lambda, SBRD converges to a NE - SBRD_S3_numIte[l, 0, t]: number of steps to convergence (or cycle) for SBRD of the t-th test with the l-th value of lambda - SBRD_S3_val[l, 0, t]: NE payoff reached by SBRD in the t-th test with l-th value of lambda if SBRD converges to a NE, or average payoff along the cycle if SBRD converges to a cycle. - SBRD_S3_time[l, 0, t]: time to run SBRD in the t-th test with l-th value of lambda.

# Experimental setup
samples = 1000
lambda_vals = [0.85+0.09999999999*0.25*i for i in range(7)] # List of lambda values to test
A_vals = [50]

# Outputs to Track
## Policy Gradient Dynamics
PGD_S3_isNash = np.full((len(lambda_vals), len(A_vals), samples), False, dtype=bool) # {0,1} veridicity of Nash in Policy Gradient Dynamics
PGD_S3_numIte = np.zeros((len(lambda_vals), len(A_vals), samples)) # Number of iterations in Policy Gradient Dynamics
PGD_S3_valFin = np.zeros((len(lambda_vals), len(A_vals), samples)) # Final payoff for player 0 in Policy Gradient Dynamics
PGD_S3_valAvg = np.zeros((len(lambda_vals), len(A_vals), samples)) # Average payoff for player 0 in Policy Gradient Dynamics
PGD_S3_time = np.zeros((len(lambda_vals), len(A_vals), samples))   # Runtime in Policy Gradient Dynamics

## Best Response Dynamics
SBRD_S3_isNash = np.full((len(lambda_vals), len(A_vals), samples), False, dtype=bool) # {0,1} veridicity of Nash in Best Response Dynamics
SBRD_S3_numIte = np.zeros((len(lambda_vals), len(A_vals), samples)) # Number of iterations in Best Response Dynamics
SBRD_S3_val = np.zeros((len(lambda_vals), len(A_vals), samples)) # Final payoff for player 0 in Best Response Dynamics
SBRD_S3_time = np.zeros((len(lambda_vals), len(A_vals), samples))   # Runtime in Best Response Dynamics
max_A = A_vals[-1]  # Maximum number of actions (fixed at 50)
np.random.seed(2025) #We set the random seed for reproducibility 

T_0 = time()  # Start the global timer

for count_lam, lam in enumerate(lambda_vals):  # Loop over correlation values
    for i in range(samples):  # Loop over sample repetitions
        # Generate a shared reward matrix for both algorithms
        G = generate_reward_matrix(3, max_A, lam)

        for count_A, A in enumerate(A_vals):  # Loop over population sizes (fixed here)
            t_0 = time()  # Start timer for PGD
            PGD_it, PGD_avg_val, PGD_val = gradient_ascent_dynamics(G, A)
            t = time() - t_0  # Elapsed time

            PGD_S3_isNash[count_lam, count_A, i] = (PGD_it < 50_000 - 1)  # Converged?
            PGD_S3_numIte[count_lam, count_A, i] = PGD_it
            PGD_S3_valFin[count_lam, count_A, i] = PGD_val
            PGD_S3_valAvg[count_lam, count_A, i] = PGD_avg_val
            PGD_S3_time[count_lam, count_A, i] = t

            # === Best Response Dynamics (SBRD) ===
            t_0 = time()  # Start timer for SBRD
            SBRD_len, SBRD_it, SBRD_val_curr = best_response_dynamics(G, A)
            t = time() - t_0  # Elapsed time

            # Store SBRD results
            SBRD_S3_isNash[count_lam, count_A, i] = (SBRD_len == 1)  # Converged to pure NE?
            SBRD_S3_numIte[count_lam, count_A, i] = SBRD_it
            SBRD_S3_val[count_lam, count_A, i] = SBRD_val_curr
            SBRD_S3_time[count_lam, count_A, i] = t

# === Time Summary ===
Tot_time = time() - T_0
Tot_time_SBRD = np.sum(SBRD_S3_time)
Tot_time_PGD = np.sum(PGD_S3_time)
Tot_time_gener = Tot_time - Tot_time_SBRD - Tot_time_PGD

# Print runtime breakdown
print(f"Total time to run: {round(Tot_time, 1)} s")
print(f"Percentage of the time spent on random generation of matrix: {round((Tot_time_gener / Tot_time) * 100, 1)}%")
print(f"Percentage of the time spent on SBRD algorithm: {round((Tot_time_SBRD / Tot_time) * 100, 1)}%")
print(f"Percentage of the time spent on PGD algorithm: {round((Tot_time_PGD / Tot_time) * 100, 1)}%")
Total time to run: 5859.2 s
Percentage of the time spent on random generation of matrix: 0.6%
Percentage of the time spent on SBRD algorithm: 0.0%
Percentage of the time spent on PGD algorithm: 99.4%

Additional remark: We print what percentage of the time each part of the experiment takes. We consistently obtain that SPGD counts for over 99% of the whole time, SBRD accounts for less than 0.1% of the running time.

We are now ready to plot our findings. We first compare the average running time of both algorithms. Note that SPGD is stopped at the first checkpoint after 10 seconds. Further notice that the scale used is logarithmic!

compare_algorithms(PGD_varName="S3_time", SBRD_varName="S3_time", title="", mean_1_name="SPGD±2SE", mean_2_name="SBRD±2SE", yLabel="Seconds", axis=1, variance=True, isPositive=True, log=True, expnr="3", legend_loc="upper right")

We now compare the final payoffs of the two algorithms. As the plotting function is called with parameter `only_if_conv=True’, we only consider the cases in which both algorithms converge. Notice that this happens with probability at least 90% at these regimes of lambda for both algorithms, as analysed above.

compare_algorithms(PGD_varName="S3_valFin", SBRD_varName="S3_val", title="", mean_1_name="SPGD±2SE", expnr="3", mean_2_name="SBRD±2SE", yLabel="Value", axis=1, variance=True, isPositive=True, only_if_conv=True, legend_loc="upper left")

We compare now the number of steps required to converge (to a NE or a cycle) for both algorithms. The scale is again logarithmic.

compare_algorithms(PGD_varName="S3_numIte", SBRD_varName="S3_numIte", title="", mean_1_name="SPGD±2SE", mean_2_name="SBRD±2SE", yLabel="Steps", axis=1, variance=True, isPositive=True, log=True, legend_loc="upper right")

We finally compare, over all the experiments (both converging and not), the average payoff of SPGD along its learning curve with the final payoff of SBRD. As described in the main paper, this makes sense as the number of steps to convergence of SBRD is much lower than the number of steps to convergence of SPGD.

compare_algorithms(PGD_varName="S3_valAvg", SBRD_varName="S3_val", title="", mean_1_name="SPGD±2SE", mean_2_name="SBRD±2SE", yLabel="Value", axis=1, variance=True, isPositive=True, legend_loc="upper right")

Appendix

Section A. Results of Section 1. also hold for more actions

We now show that what we showed in Section 1. still holds for more actions. Setting of the experiment: - 1000 independent simulations - Each simulation takes lambda in a range of values between 0 and 1 - The number of actions is set to 500 - The number of players is set to two

Statistics analysed: - SBRD_S4_twoCycles[l, 0, t]: True if the t-th test with the l-th value of lambda converges to a 2-cycle - SBRD_S4_nrSteps[l, 0, t]: number of steps to convergence (or cycle) of the t-th test with the l-th value of lambda

# Number of simulations per (λ, N) setting
samples = 1_000
lambda_vals = [0.09999999999*0.5*i for i in range(21)]
A_vals = [500]

SBRD_S4_twoCycles = np.full((len(lambda_vals), 1, samples), False, dtype=bool)
SBRD_S4_nrSteps = np.zeros((len(lambda_vals), len(A_vals), samples))
np.random.seed(2025) #We set the random seed for reproducibility 
T_0 = time()  # Start timer for entire experiment
max_A = max(A_vals)
for count_lam, lam in enumerate(lambda_vals):  # Loop over each lambda value
    for i in range(samples):  # Repeat experiment for statistical robustness
        G = generate_reward_matrix(2, max_A, lam)  # Generate a payoff matrix with max_n actions and correlation lambda
    
        for count_A, A in enumerate(A_vals):  # Now restrict to smaller N if needed
            # Run Best Response Dynamics and time it
            SBRD_len, SBRD_it, SBRD_val_curr = best_response_dynamics(G, A)
            SBRD_S4_twoCycles[count_lam, count_A, i] = (SBRD_len == 2)
            SBRD_S4_nrSteps[count_lam, count_A, i] = SBRD_it

# Final time report
Total_time = time() - T_0
print(Total_time)
126.69464802742004

Exactly as before, we plot the probability of SBRD to converge to a two-cycle (with 99.5% Clopper-Pearson CI)

plot_single_algorithm(alg_name="SBRD", varName="S4_twoCycles", title="", mean_name="twoCycle", ylabel="Prob. conver. to two-cycle", axis=1, binomial=True, legend_loc="upper left")

And now the number of steps to convergence, with a 2SE range.

plot_single_algorithm(alg_name="SBRD", varName="S4_nrSteps", mean_name="nrSteps", title="", ylabel="Steps", axis=1, isPositive=True, log=False, legend_loc="upper right")

And, as before, the threshold values of lambda. Notice that now SBRD converges to a two-cycle with 90% probability for all the values of lambda over 0.75.

print(round(lambda_vals[int(np.argmax(np.mean(SBRD_S4_twoCycles, axis=-1)>0.5))],2))
print(round(lambda_vals[int(np.argmax(np.mean(SBRD_S4_twoCycles, axis=-1)>0.9))],2))
0.5
0.75

Section B.1. Results of Section 2. also hold for more actions

We now show that what we showed in Section 1. still holds for more actions. Setting of the experiment: - 1000 independent simulations - Each simulation takes lambda in a range of values between 0 and 1 - The number of actions is set to 100 - The number of players is set to three

Statistics analysed: - SBRD_S5_isNash[l, 0, t]: True if the t-th test with the l-th value of lambda converges to a NE - SBRD_S5_nrSteps[l, 0, t]: number of steps to convergence (or cycle) of the t-th test with the l-th value of lambda

# Number of simulations per (λ, N) setting
samples = 1000
lambda_vals = [0.09999999999*0.5*i for i in range(21)]
A_vals = [100]

SBRD_S5_isNash = np.full((len(lambda_vals), 1, samples), False, dtype=bool)
SBRD_S5_nrSteps = np.zeros((len(lambda_vals), len(A_vals), samples))
np.random.seed(2025) #We set the random seed for reproducibility 
T_0 = time()  # Start timer for entire experiment
max_A = max(A_vals)
for count_lam, lam in enumerate(lambda_vals):  # Loop over each lambda value
    for i in range(samples):  # Repeat experiment for statistical robustness
        G = generate_reward_matrix(3, max_A, lam)  # Generate a payoff matrix with max_n actions and correlation lambda
    
        for count_A, A in enumerate(A_vals):  # Now restrict to smaller N if needed
            # Run Best Response Dynamics and time it
            SBRD_len, SBRD_it, SBRD_val_curr = best_response_dynamics(G, A)
            SBRD_S5_isNash[count_lam, count_A, i] = (SBRD_len == 1)
            SBRD_S5_nrSteps[count_lam, count_A, i] = SBRD_it

# Final time report
Total_time = time() - T_0
print(Total_time)
853.8707270622253

As before, we print the probability of SBRD to converge to a NE

plot_single_algorithm(alg_name="SBRD", varName="S5_isNash", title="", mean_name="isNash", ylabel="Prob. conver. to NE", axis=1, binomial=True, legend_loc="upper left")

And then the average number of steps to convergence

plot_single_algorithm(alg_name="SBRD", varName="S5_nrSteps", mean_name="nrSteps", title="", ylabel="Steps", axis=1, isPositive=True, log=False, legend_loc="upper right")

Section B.2. Results of Section 2. also hold for more players

We now test SBRD for 4 players. Setting of the experiment: - 1000 independent simulations - Each simulation takes lambda in a range of values between 0 and 1 - The number of actions is set to 50 - The number of players is set to four

Statistics analysed: - SBRD_S6_isNash[l, 0, t]: True if the t-th test with the l-th value of lambda converges to a NE - SBRD_S6_nrSteps[l, 0, t]: number of steps to convergence (or cycle) of the t-th test with the l-th value of lambda

# Number of simulations per (λ, N) setting
samples = 1_000
lambda_vals = [0.09999999999*0.5*i for i in range(21)]
A_vals = [50]

SBRD_S6_isNash = np.full((len(lambda_vals), 1, samples), False, dtype=bool)
SBRD_S6_nrSteps = np.zeros((len(lambda_vals), len(A_vals), samples))
np.random.seed(2025) #We set the random seed for reproducibility 
T_0 = time()  # Start timer for entire experiment
max_A = max(A_vals)
for count_lam, lam in enumerate(lambda_vals):  # Loop over each lambda value
    for i in range(samples):  # Repeat experiment for statistical robustness
        G = generate_reward_matrix(4, max_A, lam)  # Generate a payoff matrix with max_n actions and correlation lambda
    
        for count_A, A in enumerate(A_vals):  # Now restrict to smaller N if needed
            # Run Best Response Dynamics and time it
            SBRD_len, SBRD_it, SBRD_val_curr = best_response_dynamics(G, A)
            SBRD_S6_isNash[count_lam, count_A, i] = (SBRD_len == 1)
            SBRD_S6_nrSteps[count_lam, count_A, i] = SBRD_it

# Final time report
Total_time = time() - T_0
print(Total_time)
25260.329627752304

We see that the probability of convergence to a NE follows the same pattern as above. We plot, as before, a confidence interval of 99.5% Clopper-Pearson.

plot_single_algorithm(alg_name="SBRD", varName="S6_isNash", title="", mean_name="isNash", ylabel="Prob. conver. to NE", axis=1, binomial=True, legend_loc="upper left")

We also plot the average number of steps taken to convergence

plot_single_algorithm(alg_name="SBRD", varName="S6_nrSteps", mean_name="nrSteps", title="", ylabel="Steps", axis=1, isPositive=True, log=False, legend_loc="upper right")

Section C.1. Results of Section 3. also hold for more actions

Finally, we compare SBRD and SPGD over more actions to show that the behaviour is similar. Setting of the experiment: - 1000 independent simulations - Each simulation takes lambda in a range of values between 0.85 and 1 - The number of actions is set to 100 - The number of players is set to three

Statistics analysed: - For SPGD: - PGD_S7_isNash[l, 0, t]: True if for the t-th test with the l-th value of lambda, SPGD converges to a NE - PGD_S7_numIte[l, 0, t]: number of steps to convergence (or cycle) for SPGD of the t-th test with the l-th value of lambda - PGD_S7_valFin[l, 0, t]: final payoff reached by SBPD in the t-th test with l-th value of lambda - PGD_S7_valAvg[l, 0, t]: average payoff during the trajectory of SBPD in the t-th test with the l-th value of lambda - PGD_S7_time[l, 0, t]: time to run SBPD in the t-th test with l-th value of lambda. - Remark: SPGD is automatically stopped at the first check-point after 10 seconds run. - for SBRD: - SBRD_S7_isNash[l, 0, t]: True if for the t-th test with the l-th value of lambda, SBRD converges to a NE - SBRD_S7_numIte[l, 0, t]: number of steps to convergence (or cycle) for SBRD of the t-th test with the l-th value of lambda - SBRD_S7_val[l, 0, t]: NE payoff reached by SBRD in the t-th test with l-th value of lambda if SBRD converges to a NE, or average payoff along the cycle if SBRD converges to a cycle. - SBRD_S7_time[l, 0, t]: time to run SBRD in the t-th test with l-th value of lambda.

# Experimental setup
samples = 1000
lambda_vals = [0.85+0.09999999999*0.25*i for i in range(7)] # List of lambda values to test
A_vals = [100]

# Outputs to Track
## Policy Gradient Dynamics
PGD_S7_isNash = np.full((len(lambda_vals), len(A_vals), samples), False, dtype=bool) # {0,1} veridicity of Nash in Policy Gradient Dynamics
PGD_S7_numIte = np.zeros((len(lambda_vals), len(A_vals), samples)) # Number of iterations in Policy Gradient Dynamics
PGD_S7_valFin = np.zeros((len(lambda_vals), len(A_vals), samples)) # Final payoff for player 0 in Policy Gradient Dynamics
PGD_S7_valAvg = np.zeros((len(lambda_vals), len(A_vals), samples)) # Average payoff for player 0 in Policy Gradient Dynamics
PGD_S7_time = np.zeros((len(lambda_vals), len(A_vals), samples))   # Runtime in Policy Gradient Dynamics

## Best Response Dynamics
SBRD_S7_isNash = np.full((len(lambda_vals), len(A_vals), samples), False, dtype=bool) # {0,1} veridicity of Nash in Best Response Dynamics
SBRD_S7_numIte = np.zeros((len(lambda_vals), len(A_vals), samples)) # Number of iterations in Best Response Dynamics
SBRD_S7_val = np.zeros((len(lambda_vals), len(A_vals), samples)) # Final payoff for player 0 in Best Response Dynamics
SBRD_S7_time = np.zeros((len(lambda_vals), len(A_vals), samples))   # Runtime in Best Response Dynamics
max_A = A_vals[-1]  # Maximum number of actions (fixed at 50)
np.random.seed(2025) #We set the random seed for reproducibility 

T_0 = time()  # Start the global timer

for count_lam, lam in enumerate(lambda_vals):  # Loop over correlation values
    for i in range(samples):  # Loop over sample repetitions
        # Generate a shared reward matrix for both algorithms
        G = generate_reward_matrix(3, max_A, lam)

        for count_A, A in enumerate(A_vals):  # Loop over population sizes (fixed here)
            t_0 = time()  # Start timer for PGD
            PGD_it, PGD_avg_val, PGD_val = gradient_ascent_dynamics(G, A)
            t = time() - t_0  # Elapsed time

            PGD_S7_isNash[count_lam, count_A, i] = (PGD_it < 50_000 - 1)  # Converged?
            PGD_S7_numIte[count_lam, count_A, i] = PGD_it
            PGD_S7_valFin[count_lam, count_A, i] = PGD_val
            PGD_S7_valAvg[count_lam, count_A, i] = PGD_avg_val
            PGD_S7_time[count_lam, count_A, i] = t

            # === Best Response Dynamics (SBRD) ===
            t_0 = time()  # Start timer for SBRD
            SBRD_len, SBRD_it, SBRD_val_curr = best_response_dynamics(G, A)
            t = time() - t_0  # Elapsed time

            # Store SBRD results
            SBRD_S7_isNash[count_lam, count_A, i] = (SBRD_len == 1)  # Converged to pure NE?
            SBRD_S7_numIte[count_lam, count_A, i] = SBRD_it
            SBRD_S7_val[count_lam, count_A, i] = SBRD_val_curr
            SBRD_S7_time[count_lam, count_A, i] = t

# === Time Summary ===
Tot_time = time() - T_0
Tot_time_SBRD = np.sum(SBRD_S7_time)
Tot_time_PGD = np.sum(PGD_S7_time)
Tot_time_gener = Tot_time - Tot_time_SBRD - Tot_time_PGD

# Print runtime breakdown
print(f"Total time to run: {round(Tot_time, 1)} s")
print(f"Percentage of the time spent on random generation of matrix: {round((Tot_time_gener / Tot_time) * 100, 1)}%")
print(f"Percentage of the time spent on SBRD algorithm: {round((Tot_time_SBRD / Tot_time) * 100, 1)}%")
print(f"Percentage of the time spent on PGD algorithm: {round((Tot_time_PGD / Tot_time) * 100, 1)}%")
Total time to run: 21041.3 s
Percentage of the time spent on random generation of matrix: 1.2%
Percentage of the time spent on SBRD algorithm: 0.0%
Percentage of the time spent on PGD algorithm: 98.7%

As before, we see that SBRD takes less that 0.1% of the total running time. The largest proportion of the time is taken by SPGD. As before, SPGD is stopped at the first checkpoint after 10 seconds if convergence is not reached by then.

We first plot the average time as before. Notice that the scale is logarithmic!

compare_algorithms(PGD_varName="S7_time", SBRD_varName="S7_time", title="", mean_1_name="SPGD±2SE", expnr="7", mean_2_name="SBRD±2SE", yLabel="Seconds", axis=1, variance=True, isPositive=True, log=True, legend_loc="upper right")

We now plot the average value of equilibria attained by the two algorithms. As mentioned above, they both converge with high probability, therefore these values have statistical significance.

compare_algorithms(PGD_varName="S7_valFin", SBRD_varName="S7_val", title="", expnr="7", mean_1_name="SPGD±2SE", mean_2_name="SBRD±2SE", yLabel="Value", axis=1, variance=True, isPositive=True, only_if_conv=True, legend_loc="upper left")

We plot the average number of steps to convergence (to a NE or to a cycle)

compare_algorithms(PGD_varName="S7_numIte", SBRD_varName="S7_numIte", title="", mean_1_name="SPGD±2SE",expnr="7", mean_2_name="SBRD±2SE", yLabel="Steps", axis=1, variance=True, isPositive=True, log=True, legend_loc="upper right")

And finally, as before we compare the average final value (equilibrium or not) of SBRD against the average value attained by SPGD in its trajectory.

compare_algorithms(PGD_varName="S7_valAvg", SBRD_varName="S7_val", title="", expnr="7", mean_1_name="SPGD±2SE", mean_2_name="SBRD±2SE", yLabel="Value", axis=1, variance=True, isPositive=True, legend_loc="upper right")