In [26]:
from pref_voting.profiles_with_ties import *
from pref_voting.voting_methods import *
from pref_voting.analysis import *
from pref_voting.generate_profiles import *
from pref_voting.generate_weighted_majority_graphs import *

from pref_voting.utility_profiles import UtilityProfile

from pref_voting.generate_utility_profiles import *
from pref_voting.generate_utility_profiles import *
from pref_voting.utility_methods import *

from pref_voting.spatial_profiles import SpatialProfile
from pref_voting.generate_spatial_profiles import *
from pref_voting.utility_functions import *
from pref_voting.probabilistic_methods import *
from tqdm.notebook import tqdm
import nashpy as nash
import numpy as np
import random2 as random
from multiprocess import Pool, cpu_count, current_process
from numba import njit, float32

In [27]:
print(np.mean([1, 1, 1, 0, 0]))

print(np.nanmean([1, 1, 1, 0, 0, np.nan]))

print(np.nanmean([1, 1, np.nan, np.nan, 0, 0, np.nan, 1]))


0.6
0.6
0.6


In [28]:
ms=np.array([0, 1, 0, 0.0])
np.where(ms != 0, 1, np.nan)

array([nan,  1., nan, nan])

In [29]:
np.any(np.array([np.nan, np.nan, np.nan]) > 0.5)

False

In [30]:
np.nan < 0.1

# test if array has nan
np.isnan(np.array([np.nan, np.nan, np.nan])).any()

True

In [31]:
prof = generate_profile(3, 4, "Spatial", (3, quadratic_utility))

prof.display()

def gen_samples(voting_methods, num_profiles=1):

    profiles = [generate_profile(5, 10, "Spatial", (3, quadratic_utility)) for _ in range(num_profiles)]

    return np.array([[len(vm(prof)) for prof in profiles] for vm in voting_methods])

def is_condorcet_consistent(prof, vm): 
    cw = prof.condorcet_winner()
    return int([cw] == vm(prof)) if cw is not None else np.nan

def gen_samples2(voting_methods, num_profiles=1):

    profiles = [generate_profile(5, 11, "Spatial", (3, quadratic_utility)) for _ in range(num_profiles)]

    return np.array([[is_condorcet_consistent(prof, vm) for prof in profiles] for vm in voting_methods])

+---+---+---+---+
| 1 | 1 | 1 | 1 |
+---+---+---+---+
| 2 | 1 | 0 | 0 |
| 1 | 0 | 2 | 1 |
| 0 | 2 | 1 | 2 |
+---+---+---+---+


In [32]:
gen_samples2([plurality, split_cycle], num_profiles=10)

array([[1, 1, 1, 1, 1, 1, 1, 1, 0, 1],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])

In [37]:
# def estimated_variance_of_sampling_dist(
#     mean_for_each_vm, 
#     values_for_each_vm):
#     # values_for_each_vm is a 2d numpy array

#     m = values_for_each_vm.shape[1]

#     row_means_reshaped = mean_for_each_vm[:, np.newaxis]
#     return  (1/(m*(m-1))) * np.sum((values_for_each_vm - row_means_reshaped)**2, axis=1)

# def estimated_std_error(
#     mean_for_each_vm, 
#     values_for_each_vm):
#     # values_for_each_vm is a 2d numpy array
#     return np.sqrt(estimated_variance_of_sampling_dist(mean_for_each_vm, values_for_each_vm))


def estimated_variance_of_sampling_dist( 
    values_for_each_experiment,
    mean_for_each_experiment=None):
    # values_for_each_vm is a 2d numpy array

    mean_for_each_experiment = np.nanmean(values_for_each_experiment, axis=1) if mean_for_each_experiment is not None else mean_for_each_experiment

    num_val_for_each_exp = np.sum(~np.isnan(values_for_each_experiment), axis=1)
    
    row_means_reshaped = mean_for_each_experiment[:, np.newaxis]
    return np.where(
        num_val_for_each_exp * (num_val_for_each_exp - 1) != 0.0,
        (1 / (num_val_for_each_exp * (num_val_for_each_exp - 1))) * np.nansum(
            (values_for_each_experiment - row_means_reshaped) ** 2, 
            axis=1),
            np.nan
            )

def estimated_std_error(values_for_each_experiment, mean_for_each_experiment=None):
    # values_for_each_vm is a 2d numpy array
    return np.sqrt(estimated_variance_of_sampling_dist(values_for_each_experiment, mean_for_each_experiment=mean_for_each_experiment))

In [38]:
def means_with_estimated_standard_error(
        generate_samples, 
        max_std_error, 
        initial_trials=1000, 
        step_trials=1000,
        min_num_trials=10_000, 
        max_num_trials=None,
        verbose=False
        ):
    """
    For each list of numbers produced by generate_samples, returns the means, the [estimated standard error](https://en.wikipedia.org/wiki/Standard_error) of the means, the variance of the samples, and the total number of trials.  

    Uses the estimated_variance_of_sampling_dist (as described in [https://berkeley-stat243.github.io/stat243-fall-2023/units/unit9-sim.html](https://berkeley-stat243.github.io/stat243-fall-2023/units/unit9-sim.html)) and estimated_std_error functions. 
    
    Args:
        generate_samples (function): A function that generates samples. It should take a single argument num_profiles and return a 2d numpy array of samples.
        max_std_error (float): The desired estimated standard error for the mean of each sample.
        initial_trials (int, default=1000): The number of samples to initially generate.
        step_trials (int, default=1000): The number of samples to generate in each step.
        min_num_trials (int, default=10000): The minimum number of trials to run.
        max_num_trials (int, default=None): If not None, then the maximum number of trials to run.
        verbose (bool, default=False): If True, then print progress information.

    Returns:
        A tuple (means, est_std_errors, variances, num_trials) where means is an array of the means of the samples, est_std_errors is an array of estimated standard errors of the samples,  variances is an array of the variances of the samples, and num_trials is the total number of trials.

    """
    
    # samples is a 2d numpy array
    samples = generate_samples(num_profiles = initial_trials)
    
    means = np.nanmean(samples, axis=1)
    variances = np.nanvar(samples, axis=1)
    est_std_errors = estimated_std_error( 
        samples, 
        mean_for_each_experiment=means)
        
    if verbose:
        print("Initial number of trials:", initial_trials)
        print(f"Remaining estimated standard errors greater than {max_std_error}:", np.sum(est_std_errors > max_std_error))
        print(f"Estimated standard errors that are still greater than {max_std_error}:\n",est_std_errors[est_std_errors > max_std_error])

    num_trials = initial_trials
    
    while (np.isnan(est_std_errors).any() or np.any(est_std_errors > max_std_error) or (num_trials < min_num_trials)) and (max_num_trials is None or num_trials < max_num_trials):
        if verbose:
            print("Number of trials:", num_trials)
            print(f"Remaining estimated standard errors greater than {max_std_error}:", np.sum(est_std_errors > max_std_error))
            print(f"Estimated standard errors that are still greater than {max_std_error}:\n",est_std_errors[est_std_errors > max_std_error])

        new_samples = generate_samples(num_profiles = step_trials)

        samples = np.concatenate((samples, new_samples), axis=1)

        num_trials += step_trials

        means = np.nanmean(samples, axis=1)
        variances = np.nanvar(samples, axis=1)
        est_std_errors = estimated_std_error(
            samples, 
            mean_for_each_experiment=means)

    return means, est_std_errors, variances, num_trials

In [40]:
voting_methods = [plurality, split_cycle, bracket_voting, stable_voting]
gsamples = partial(gen_samples2, voting_methods)

means_with_estimated_standard_error(
    gsamples, 
    0.001, 
    verbose=True)

Initial number of trials: 1000
Remaining estimated standard errors greater than 0.001: 2
Estimated standard errors that are still greater than 0.001:
 [0.0151113  0.00387506]
Number of trials: 1000
Remaining estimated standard errors greater than 0.001: 2
Estimated standard errors that are still greater than 0.001:
 [0.0151113  0.00387506]
Number of trials: 2000
Remaining estimated standard errors greater than 0.001: 2
Estimated standard errors that are still greater than 0.001:
 [0.01084164 0.00228446]
Number of trials: 3000
Remaining estimated standard errors greater than 0.001: 2
Estimated standard errors that are still greater than 0.001:
 [0.00890166 0.00208502]
Number of trials: 4000
Remaining estimated standard errors greater than 0.001: 2
Estimated standard errors that are still greater than 0.001:
 [0.00770203 0.00180247]
Number of trials: 5000
Remaining estimated standard errors greater than 0.001: 2
Estimated standard errors that are still greater than 0.001:
 [0.00688602 0.

(array([0.65162032, 1.        , 0.98602056, 1.        ]),
 array([0.00099835, 0.        , 0.00024601, 0.        ]),
 array([0.22701128, 0.        , 0.01378402, 0.        ]),
 239000)

In [25]:
np.var([1]*1000 + [0]*1000)

0.25