In [1]:
import numpy as np

In [2]:
class ApplicationSimulator:
    def __init__(self, n_schools: int):
        self.n_schools = n_schools
        print("Initialised with ", n_schools, "schools")
    def generate_applications(self, n_students: int):
        pass
    def generate_decisions(self):
        pass
    def generate_data(self, n_students: int):
        self.generate_applications(n_students)
        self.generate_decisions()
        return self.application, self.decision


In [3]:
# In this model, everything is independent
# The students apply to schools independently 
class CompletelyIndependentApplication(ApplicationSimulator):
    def __init__(self, n_schools: int):
        super().__init__(n_schools)
        self.application_probability = np.random.dirichlet(np.ones(self.n_schools))
        print("Student apply with these probabilities:", self.application_probability)
        self.acceptance_probability = np.random.uniform(size = self.n_schools)
        print("Each school accepts students these probabilities:", self.acceptance_probability)
    def generate_applications(self, n_students: int):
        self.n_students = n_students
        self.application = np.zeros(n_students)
        for s in range(self.n_students):
            self.application[s] = np.random.choice(self.n_schools, p=self.application_probability)
    def generate_decisions(self):
        n_students = len(self.application)
        self.decision = np.zeros(n_students)
        for s in range(n_students):
            school = int(self.application[s])
            p = self.acceptance_probability[school]
            print(school)
            d = np.random.choice(2, p = [1 - p, p])
            self.decision[s] = d


In [50]:
test = CompletelyIndependentApplication(3)
applications, decisions = test.generate_data(10)
print(applications, decisions)

Initialised with  3 schools
Student apply with these probabilities: [0.07863284 0.62426635 0.29710081]
Each school accepts students these probabilities: [0.30233537 0.38497953 0.24272126]
1
1
1
2
1
1
1
2
1
1
[1. 1. 1. 2. 1. 1. 1. 2. 1. 1.] [0. 0. 1. 1. 0. 1. 0. 1. 0. 0.]


In [53]:
# In this model, everything is independent
# The students apply to schools independently 
class GenderDependentApplication(ApplicationSimulator):
    def __init__(self, n_schools: int):
        super().__init__(n_schools)
        self.application_probability_male = np.random.dirichlet(np.ones(self.n_schools))
        self.application_probability_female = np.random.dirichlet(np.ones(self.n_schools))
        print("Student apply with these probabilities:",
              self.application_probability_male,
              self.application_probability_female)
        self.acceptance_probability = np.random.uniform(size = self.n_schools)
        print("Each school accepts students these probabilities:", self.acceptance_probability)
    def generate_applications(self, n_students: int):
        self.gender = np.random.choice(2, size = n_students)
        self.application = np.zeros(n_students)
        for s in range(n_students):
            if (self.gender[s]==0):
                self.application[s] = np.random.choice(self.n_schools, p=self.application_probability_male)
            else:
                self.application[s] = np.random.choice(self.n_schools, p=self.application_probability_female)
    def generate_decisions(self):
        n_students = len(self.application)
        self.decision = np.zeros(n_students)
        for s in range(n_students):
            school = int(self.application[s])
            p = self.acceptance_probability[school]
            print(school)
            d = np.random.choice(2, p = [1 - p, p])
            self.decision[s] = d
test = GenderDependentApplication(3)
test.generate_data(10)

Initialised with  3 schools
Student apply with these probabilities: [0.05169466 0.69216342 0.25614192] [0.47384631 0.08933192 0.43682177]
Each school accepts students these probabilities: [0.08385708 0.90518919 0.17753453]
2
2
2
2
2
0
2
0
1
2


(array([2., 2., 2., 2., 2., 0., 2., 0., 1., 2.]),
 array([1., 0., 0., 0., 0., 0., 0., 0., 1., 0.]))

In [59]:
# In this model, everything is independent
# The students apply to schools independently 
class DependentApplication(ApplicationSimulator):
    def __init__(self, n_schools: int):
        super().__init__(n_schools)
        self.application_probability_male = np.random.dirichlet(np.ones(self.n_schools))
        self.application_probability_female = np.random.dirichlet(np.ones(self.n_schools))
        print("Student apply with these probabilities:",
              self.application_probability_male,
              self.application_probability_female)
        self.acceptance_probability_male= np.random.uniform(size = self.n_schools)
        self.acceptance_probability_female = np.random.uniform(size = self.n_schools)
        print("Each school accepts students these probabilities:", 
              self.acceptance_probability_male,
                self.acceptance_probability_female)

    def generate_applications(self, n_students: int):
        self.gender = np.random.choice(2, size = n_students)
        self.application = np.zeros(n_students)
        for s in range(n_students):
            if (self.gender[s]==0):
                self.application[s] = np.random.choice(self.n_schools, p=self.application_probability_male)
            else:
                self.application[s] = np.random.choice(self.n_schools, p=self.application_probability_female)
    def generate_decisions(self):
        n_students = len(self.application)
        self.decision = np.zeros(n_students)
        for s in range(n_students):
            school = int(self.application[s])
            if (self.gender[s]==0):
                p = self.acceptance_probability_male[school]
            else:
                p = self.acceptance_probability_female[school]
            print(school)
            d = np.random.choice(2, p = [1 - p, p])
            self.decision[s] = d
test = DependentApplication(3)
test.generate_data(10)

Initialised with  3 schools
Student apply with these probabilities: [0.36066168 0.21538529 0.42395303] [0.42028408 0.53875249 0.04096343]
Each school accepts students these probabilities: [0.11959595 0.36897535 0.99385479] [0.08732052 0.88390126 0.49654598]
1
1
0
0
0
2
1
1
0
2


(array([1., 1., 0., 0., 0., 2., 1., 1., 0., 2.]),
 array([1., 1., 0., 1., 0., 1., 1., 1., 0., 1.]))

In [68]:
# Here the students get a grade, dependent on the gender
# Then the outcome also depends on the grade
class GradeDependentApplication(ApplicationSimulator):
    def __init__(self, n_schools: int):
        super().__init__(n_schools)
        self.application_probability_male = np.random.dirichlet(np.ones(self.n_schools))
        self.application_probability_female = np.random.dirichlet(np.ones(self.n_schools))
        print("Student apply with these probabilities:",
              self.application_probability_male,
              self.application_probability_female)
        self.acceptance_threshold = np.random.uniform(size = self.n_schools)*4
        print("Each school accepts students these thresholds:", self.acceptance_threshold)
    def generate_applications(self, n_students: int):
        self.gender = np.random.choice(2, size = n_students)
        self.application = np.zeros(n_students)
        self.grade = np.zeros(n_students)
        for s in range(n_students):
            if (self.gender[s]==0):
                self.application[s] = np.random.choice(self.n_schools, p=self.application_probability_male)
                self.grade[s] = np.random.choice(5, p=[0.1, 0.2, 0.3, 0.2, 0.2])
            else:
                self.application[s] = np.random.choice(self.n_schools, p=self.application_probability_female)
                self.grade[s] = np.random.choice(5, p=[0.1, 0.1, 0.25, 0.3, 0.25])
        print("Grades:", self.grade)
        
    def generate_decisions(self):
        n_students = len(self.application)
        self.decision = np.zeros(n_students)
        for s in range(n_students):
            school = int(self.application[s])
            if (self.grade[s] > self.acceptance_threshold[school]):
                self.decision[s] = 1
            else:
                self.decision[s] = 0
                
test = GradeDependentApplication(3)
test.generate_data(10)

Initialised with  3 schools
Student apply with these probabilities: [0.27280212 0.07638029 0.65081759] [0.42975929 0.13928883 0.43095188]
Each school accepts students these thresholds: [0.40696758 1.86393788 3.64350478]
Grades: [2. 2. 2. 2. 2. 2. 2. 2. 4. 2.]


(array([2., 2., 2., 2., 2., 0., 1., 0., 0., 0.]),
 array([0., 0., 0., 0., 0., 1., 1., 1., 1., 1.]))

In [69]:
import pandas as pd
acceptance_rates = np.array([[62, 82], [63, 68], [37, 34], [33, 35], [28, 24], [6, 7]])

In [6]:
application_rates = np.array([[0.3, 0.3, 0.2, 0.1, 0.05, 0.05], [0.05, 0.05, 0.1, 0.2, 0.3, 0.3]])


In [7]:
print("M acceptance: ", np.dot(acceptance_rates[:,0],application_rates[0]))
print("F acceptance: ", np.dot(acceptance_rates[:,1],application_rates[1]))

M acceptance:  49.89999999999999
F acceptance:  27.200000000000003


In [8]:
application_rates

array([[0.3 , 0.3 , 0.2 , 0.1 , 0.05, 0.05],
       [0.05, 0.05, 0.1 , 0.2 , 0.3 , 0.3 ]])