导入库

In [1]:
import random
import math
import copy
from collections import defaultdict
import csv
import matplotlib

定义类

In [2]:
class Group:
    def __init__(self,group_id):
        self.groupmate = []
        self.group_id = group_id
        self.score = 0

In [3]:
class Student:
    def __init__(self, tutorial_group, student_id, school, name, gender, cgpa):
        self.tutorial_group = tutorial_group
        self.student_id = student_id
        self.school = school
        self.name = name
        self.gender = gender
        self.cgpa = float(cgpa)

# 分TutorialGroup 类
class TutorialGroup:
    def __init__(self, group_id):
        self.group_id = group_id
        self.groupmate = []  # Student对象列表
        self.weights = {}    # 存放每组的熵权 {'School':x,'Gender':y,'CGPA':z}

# 熵权法计算权重
def entropy_weight(data_matrix):
    n = len(data_matrix)#学生数量
    m = len(data_matrix[0])#指标数量
    norm = []
    for j in range(m):
        col = [data_matrix[i][j] for i in range(n)]
        min_val, max_val = min(col), max(col)
        norm_col = [(x - min_val) / (max_val - min_val + 1e-9) for x in col]
        norm.append(norm_col)#熵权法标准化
    norm_T = list(zip(*norm)) # 转置为每行单位为学生存放数据

    P = []  # 比例矩阵 P
    for j in range(m):
        col = [norm_T[i][j] for i in range(n)]
        col_sum = sum(col) + 1e-9
        P.append([x / col_sum for x in col])

    E = []# 熵 E
    for j in range(m):
        e = -sum([p * math.log(p + 1e-9) for p in P[j]]) / math.log(n)
        E.append(e)

    D = [1 - e for e in E] # 冗余度 D & 权重 W
    W = [d / sum(D) for d in D]
    return W

#文字指标转换为数字计算权重
def encode_category(value, mapping_dict):
    if value not in mapping_dict:
        mapping_dict[value] = len(mapping_dict) + 1#编码
    return mapping_dict[value]


dataset = {}   # {组号: TutorialGroup 对象}
students = []

with open("records.csv", newline='', encoding='utf-8') as f:#防止空行，保证格式正确
    reader = csv.DictReader(f)#读取csv文件，并自动转成字典
    for row in reader:
        s = Student(
            tutorial_group=row["Tutorial Group"],
            student_id=row["Student ID"],
            school=row["School"],
            name=row["Name"],
            gender=row["Gender"],
            cgpa=row["CGPA"]
        )
        students.append(s)

        if s.tutorial_group not in dataset:#存组号
            dataset[s.tutorial_group] = TutorialGroup(s.tutorial_group)#创建组存组号
        dataset[s.tutorial_group].groupmate.append(s)#选组，找组员，添加

print(f" {len(students)} students，{len(dataset)} tutorial group")#多少学生&组

# 计算每组的 School, Gender, CGPA 权重
school_map, gender_map = {}, {}
for group_id, tg in dataset.items():#key&value对应的值
    data_matrix = []
    for s in tg.groupmate:
        school_code = encode_category(s.school, school_map)
        gender_code = encode_category(s.gender, gender_map)
        cgpa_value = s.cgpa
        data_matrix.append([school_code, gender_code, cgpa_value])#文字指标转数字
    weights = entropy_weight(data_matrix)
    tg.weights = {"School": weights[0],"Gender": weights[1],"CGPA": weights[2]}
    #print(group_id)
    #print({"School": weights[0],"Gender": weights[1],"CGPA": weights[2]})
    #print(f"{group_id} 权重: School={weights[0]:.3f}, Gender={weights[1]:.3f}, CGPA={weights[2]:.3f}")
#print(TutorialGroup('G-1').group_id)

 6000 students，120 tutorial group


交换退火

交换函数

伪代码：
```
FUNCTION exchange_two(groups):  
    ID1,ID2 = randomly choose 2 groups from group  
    pick1 = selected group1  
    pick2 = selected group2  
    student1,student2 = randomly choose 1 student from each selected group  
    exchange student1 and student2  
    RETURN pick1,pick2,student1,student2

In [4]:
def exchange_two(groups,tutorialgroup_id,number_of_groups):
    ID1,ID2 = random.sample(range(number_of_groups),2)  #随机选取两个组
    pick1 = groups[ID1]
    pick2 = groups[ID2]  #定义pick1 pick2为选出的两个组
    previous_score = calc_score_after_total(ID1,ID2,groups,tutorialgroup_id)
    student_id1 = random.randint(0,len(pick1.groupmate)-1)
    student_id2 = random.randint(0,len(pick2.groupmate)-1)  #在两个组里随机各选一个人
    student1 = pick1.groupmate[student_id1]
    student2 = pick2.groupmate[student_id2]
    pick1.groupmate[student_id1],pick2.groupmate[student_id2] = pick2.groupmate[student_id2],pick1.groupmate[student_id1]
      #交换两个人
    return ID1,ID2,previous_score,student1,student2  #这个返回什么后面看需求吧

计算交换后的分数

伪代码：  
```
FUNCTION calc_score_after_total(first_group,second_group,Tutorialgroup_list,tutorialgroup_id):
    return first_group's score + second_group's score
    

In [5]:
def calc_score_after_total(ID1,ID2,Tutorialgroup_list,tutorialgroup_id):
    return calc_score_v1(Tutorialgroup_list[ID1].groupmate,tutorialgroup_id) + calc_score_v1(Tutorialgroup_list[ID2].groupmate,tutorialgroup_id)

判断是否接受

伪代码:
```
FUNCTION accept_change(delta,temperature):  
    IF delta > 0:
        RETURN True
    ELSE:
        sta = exp(delta/temperature)
        IF:
            random_value < sta
                RETURN TRUE
        ElSE:
            RETURN False            

In [6]:
def accept_change(delta,temperature):  #delta和temperature在后面退火的函数再说，delta就是分数差
    if delta > 0:
        return True   #交换后变好，直接接受
    else:
        sta = math.exp(delta/temperature)
        return random.random() < sta   #变坏后有一定几率接受

撤销交换函数

In [7]:
def revert_change(groups,ID1,ID2,student1,student2):
    pick1 = groups[ID1]
    pick2 = groups[ID2]
    id1 = pick1.groupmate.index(student2)
    id2 = pick2.groupmate.index(student1)
    pick1.groupmate[id1],pick2.groupmate[id2] = student1,student2
    #return groups

退火主函数

伪代码：
```
FUNCTION annealing(groups,tutorialgroup_id,initial_temp=100 (#initial temperature),cooling_rate=0.99 (#the speed of cooling),min_tem (#min temperature,when temperature<min_temperature,break),max_iter=1000 (#it can iterate at most 1000 times)):  
    current_groups = copy_of_groups
    current_score = the_group_score_obtained_from_the_previous_process  
    initialize temperature = 0  
    initialize iteration = 0  
    initialize the num of consecutive exchanges that are all not accepted = 0  
    WHILE temperature is not below min_tem AND iteration has not exceeded max_iter:  
        iteration = iteration+1  
        initialize new_groups = current_groups  
        initialize new_score = current_score  
        use exchange_two to exchange two student  
        calculate new_score of the tutorial group  
        caculate the score difference before and after  
        IF change is accepted:  
            current_groups = new_groups  
            currrent_score = new_score  
            let no_change_count = 0 again
        ELSE:  
            no_change_count = no_change_count +1   
    temperature cool down  
    RETURN current_groups,current_score      

In [8]:
def annealing(tutorialgroup_id,group_size,number_of_groups,initial_tem=100,cooling_rate=0.99,min_tem=0.01, max_iter=1000):  #这些都是自己定义的数据，后面商量一下定为多少
    current_groups = stratified_grouping(tutorialgroup_id,group_size,number_of_groups)  #初始化
    temperature = initial_tem
    iteration = 0  #计算迭代次数
    no_change_count = 0  #计算连续交换后都不接受的情况发生的次数
    while temperature > min_tem and iteration < max_iter and no_change_count < 50:
        iteration += 1
        ID1,ID2,current_score,student1,student2 = exchange_two(current_groups,tutorialgroup_id,number_of_groups)#交换
        new_score = calc_score_after_total(ID1,ID2,current_groups,tutorialgroup_id)
        delta = current_score - new_score  #计算调换后的分数和delta
        if accept_change(delta,temperature):
            #current_groups = new_groups
            #current_score = new_score  #也就是说。ori_groups保留原始数据永远不动，new_groups储存交换之后的分组信息，不管交换是否被接受，current_groups在确定交换接受后把这个新的分组储存进去
            no_change_count = 0 #因为是连续不接受，所以一旦接受就重新变成0
        else:
            no_change_count += 1
            revert_change(current_groups,ID1,ID2,student1,student2)
        temperature *= cooling_rate  #每次循环后降温一次
    return current_groups

贪心算法部分

伪代码:
```
FUNCTION calc_score_v1(the list of 5 student, which tutorial group are they from):
    quanzhong = the previous calculated weights`
    calculate the gender_score:
        iterate through the members of the five_person group and find the current number of males
        calculate the current proportion of males in the five-person group
        iterate through the members of the tutorial group and find the current number of males
        calculate the current proportion of males in the tutorial group
        the gender_score = the absolute value of proportion's differnce

    calculate the school_diversity:
        find the current number of schools of the group
        find the current number of student of the group
        school_diversity = 1-school_number/student_number

    caculate the cgpa_score:
        calculate the average cgpa of the tutorial group
        calculate the average cgpa of the group
        cgpa_score = the absolute value of (average_cgpa_in_group - average_cgpa_in_tutorialgroup)

    caculate the score:
        score = gender_score*the_weight_of_gender_score + school_diversity*the_weight_of_school_diversity + cgpa_score*the_weigh_of_cgpa_score

In [9]:
def calc_score_v1(given_list,tutorialgroup_id):
    quanzhong = dataset[tutorialgroup_id].weights
    #print(TutorialGroup('G-1').weights)
    #print(quanzhong)
    male_count = 0
    #计算性别均衡性分数
    #计算当前小组性别比
    for student in given_list:
        if student.gender == 'Male':
            male_count += 1
    if given_list:
        male_ratio = male_count / len(given_list)
    else:
        male_ratio = 0
    #计算Tutorial group的性别比
    male_count = 0
    for student in dataset[tutorialgroup_id].groupmate:
        if student.gender == 'Male':
            male_count += 1
    target_male_ratio = male_count / 120
    gender_score = abs(male_ratio - target_male_ratio)
    #计算专业均衡性分数
    school_in_group = len(set(stu.school for stu in given_list))
    if given_list:
        school_diversity = school_in_group / len(given_list)
    else:
        school_diversity = 0
    school_score = 1-school_diversity
    #计算CGPA均衡性分数
    total_cgpa = 0
    for student in dataset[tutorialgroup_id].groupmate:
        total_cgpa += student.cgpa
    target_average_cgpa = total_cgpa / 120
    current_total_cgpa = 0
    for student in given_list:
        current_total_cgpa += student.cgpa
    if given_list:
        current_average_cgpa = current_total_cgpa / len(given_list)
    else:
        current_average_cgpa = 0
    cgpa_score = abs(current_average_cgpa - target_average_cgpa)
    #根据权重计算总分
    score = gender_score * quanzhong['Gender'] + school_score * quanzhong['School'] + cgpa_score * quanzhong['CGPA']
    return score
'''
def calc_score_v2(group_id,tutorialgroup_id):
    quanzhong = dataset[tutorialgroup_id].weights
    male_count = 0
    # 计算性别均衡性分数
    # 计算当前小组性别比
    given_list = Group(group_id).groupmate
    for student in given_list:
        if student.gender == 'Male':
            male_count += 1
    male_ratio = male_count / len(given_list)
    # 计算Tutorial group的性别比
    male_count = 0
    for student in dataset[tutorialgroup_id].groupmate:
        if student.gender == 'Male':
            male_count += 1
    target_male_ratio = male_count / 120
    gender_score = abs(male_ratio - target_male_ratio)
    # 计算专业均衡性分数
    school_in_group = len(set(stu.school for stu in given_list))
    school_diversity = school_in_group / len(given_list)
    school_score = 1 - school_diversity
    # 计算CGPA均衡性分数
    total_cgpa = 0
    for student in dataset[tutorialgroup_id].groupmate:
        total_cgpa += student.cgpa
    target_average_cgpa = total_cgpa / 120
    current_total_cgpa = 0
    for student in given_list:
        current_total_cgpa += student.cgpa
    current_average_cgpa = current_total_cgpa / len(given_list)
    cgpa_score = abs(current_average_cgpa - target_average_cgpa)
    score = gender_score * quanzhong['Gender'] + school_score * quanzhong['School'] + cgpa_score * quanzhong['CGPA']
    return score
'''

"\ndef calc_score_v2(group_id,tutorialgroup_id):\n    quanzhong = dataset[tutorialgroup_id].weights\n    male_count = 0\n    # 计算性别均衡性分数\n    # 计算当前小组性别比\n    given_list = Group(group_id).groupmate\n    for student in given_list:\n        if student.gender == 'Male':\n            male_count += 1\n    male_ratio = male_count / len(given_list)\n    # 计算Tutorial group的性别比\n    male_count = 0\n    for student in dataset[tutorialgroup_id].groupmate:\n        if student.gender == 'Male':\n            male_count += 1\n    target_male_ratio = male_count / 120\n    gender_score = abs(male_ratio - target_male_ratio)\n    # 计算专业均衡性分数\n    school_in_group = len(set(stu.school for stu in given_list))\n    school_diversity = school_in_group / len(given_list)\n    school_score = 1 - school_diversity\n    # 计算CGPA均衡性分数\n    total_cgpa = 0\n    for student in dataset[tutorialgroup_id].groupmate:\n        total_cgpa += student.cgpa\n    target_average_cgpa = total_cgpa / 120\n    current_total_cgpa = 0\

贪心算法分层分组伪代码：
```
获取50名学生对象的列表
将 分层分组字典 的默认值设为:空列表
以 学生个体 遍历 学生列表:
    学生所属的层 = (学院，性别)
    分层分组字典的(学院，性别)键添加当前学生
每一层按成绩从高到低排序
建立储存分组结果的字典:{1到10:[小组1到10]}
建立储存每组分数的列表:[11个0] #列表第一个元素编号为0，故不用
建立按稀有度排序的分层分组列表：[((学院,性别),[学生1,学生2,......]),......]并将人数最少的层排在前面
以层，学生列表 遍历 分层分组列表:
    如果分配学生次数达到10，则退出
    寻找空的小组
    将当前层的第一名学生加入该小组
更新所有小组的均衡性分数并储存在分数列表中
将剩余40名学生按照稀有度→成绩的顺序储存在一个列表中
以学生个体遍历学生列表:
    遍历十个小组：
        尝试将该学生加入该小组
        计算该学生加入小组带来的分数增加量(增加量越小越好)
        找出分数增加量的最小值及其对应的小组
    将学生加入分数增加最少的小组
    #将该学生移出学生列表
建立10个小组类的对象
将学生分组信息储存在对象中并导出

In [10]:
def stratified_grouping(current_tutorialgroup,group_size,number_of_groups):
    students = dataset[current_tutorialgroup].groupmate #students格式：[student1,student2,......,student50]
    #创建分层分组字典stratified_groups，格式:{('学院','性别'):[student1,student2,......],......}
    stratified_groups = defaultdict(list)
    for student in students:
        key = (student.school,student.gender)
        stratified_groups[key].append(student)
    #每一层按成绩从高到低排序
    for key in stratified_groups:
        stratified_groups[key].sort(key=lambda x: x.cgpa, reverse=True)
    groups = {i : [] for i in range(1, number_of_groups +1)} #储存分组结果的字典
    groups_score = [0] * (number_of_groups + 1)
    #开始按稀有度分组
    sorted_layers = sorted(stratified_groups.items(),key=lambda x:len(x[1])) #此处x：(('学院','性别'),[student1,student2,...])；按稀有度排序
    count = 0 #记录分组次数，次数达到10后退出稀有度分组
    for layer_key, students in sorted_layers:
        if count >= number_of_groups:
            break
        if students: #该层非空：
            target = min(groups.keys(), key = lambda x: len(groups[x])) #寻找空的组，获取其编号
            target_student = students.pop(0) #从该层中拿走成绩最好的学生
            #stratified_groups[layer_key].remove(target_student)
            groups[target].append(target_student) #将该学生加入小组
            count += 1
            #print(f"将层级{layer_key}的学生分到第{target}组")
    #更新分数
    for i in range(1,number_of_groups+1):
        groups_score[i] = calc_score_v1(groups[i],current_tutorialgroup)
    #开始按贪心算法分组，不再需要层级
    remaining_students = []
    for layer_key,students in sorted_layers:
        remaining_students.extend(students)
    #AI推荐先按CGPA排序再贪心分组，但我认为没有必要
    #remaining_students.sort(key=lambda x: x.cgpa, reverse=True)
    copied_remaining_students = copy.copy(remaining_students) #防止遍历过程中删除元素导致遍历出错
    for student in copied_remaining_students:
        best_group = None
        lowest_score_increase = 13000721 #计算分数增加的最小值
        best_group_score = None
        full = True #通过布尔值记录是否所有小组已满
        for i in range(1,number_of_groups+1):
            if len(groups[i]) >= group_size:
                continue
            full = False
            #生成临时小组，计算分数增加量
            former_score = groups_score[i]
            temp_group = copy.copy(groups[i])
            temp_group.append(student)
            current_score = calc_score_v1(temp_group,current_tutorialgroup)
            score_increase = current_score - former_score
            if score_increase < lowest_score_increase:
                lowest_score_increase = score_increase
                best_group = i
                best_group_score = current_score
            #print(f"第{i}组分数增加量为{score_increase}")
        #print(f"选中小组{best_group}，分数增加量为{lowest_score_increase}")
        if not full:
            remaining_students.remove(student)
            groups[best_group].append(student)
            groups_score[best_group] = best_group_score
        #print(f"将{(student.school,student.gender)}添加到第{best_group}组")
        #member_num = [len(groups[i]) for i in range(1,11)]
        #print(member_num)
    if full: #如果有剩下的学生
        randomlist = random.sample(range(1,number_of_groups+1),len(remaining_students))
        for i,student in enumerate(remaining_students):
            groups[randomlist[i]].append(student)
    #贪心分组完成
    #坏了，没有用Group类，现在加上
    output_groups = [Group(i) for i in range(1,number_of_groups+1)]
    for i in range(1,number_of_groups+1):
        output_groups[i-1].groupmate = groups[i]
        output_groups[i-1].score = groups_score[i]
        print(f"第{i}小组有{len(groups[i])}人")
    return output_groups

主程序

主程序伪代码:
```
用户输入每小组人数
建立output.csv
写入表头：[大组号,小组号,学号,学院,性别,学分]
以大组号，大组对象 遍历数据总集:
    进行该大组的分组，将分组得到的十个小组对象以列表返还
    以小组 遍历列表:
        以学生个体 遍历小组:
            将学生的个人信息和分组信息写入文件的一行
    输出该大组中每个小组的均衡性分数
保存文件并退出

In [12]:
def main():
    try:
        group_size = 5
        # int(input("Group size: "))
        number_of_groups = 50 // group_size
    except:
        print("Invalid Input")
        return None
    file = open("output.csv","w",newline = '',encoding="utf-8")
    writer = csv.writer(file)
    header = ['Tutorial Group', 'Group', 'Student ID', 'School', 'Name', 'Gender', 'CGPA']
    writer.writerow(header)
    # global dataset
    for tutorialgroup_id, tutorial_group in dataset.items():  # ('G-1',TutorialGroup(1))
        output_groups = annealing(tutorialgroup_id,group_size,number_of_groups)
        for i,group in enumerate(output_groups):
            for student in group.groupmate:
                row = [tutorialgroup_id,i+1,student.student_id,student.school,student.name,student.gender,student.cgpa]
                writer.writerow(row)
        score_list = [calc_score_v1(output_groups[i].groupmate, tutorialgroup_id) for i in range(number_of_groups)]
        print(f"第{tutorialgroup_id}大组，每组分数：{score_list}")
    file.close()
main()

第1小组有5人
第2小组有5人
第3小组有5人
第4小组有5人
第5小组有5人
第6小组有5人
第7小组有5人
第8小组有5人
第9小组有5人
第10小组有5人
第G-1大组，每组分数：[0.4990059171653851, 0.3990850104561643, 0.4073320041516752, 0.5154999045564068, 0.5116189663467546, 0.47515079857472076, 0.3896252235701371, 0.32560299530019887, 0.30110457285176967, 0.39787221726564803]
第1小组有5人
第2小组有5人
第3小组有5人
第4小组有5人
第5小组有5人
第6小组有5人
第7小组有5人
第8小组有5人
第9小组有5人
第10小组有5人
第G-10大组，每组分数：[0.4909692162227969, 0.4974978382292413, 0.5955439154353863, 0.3690650246983175, 0.49545764385222746, 0.3607002277525604, 0.49056117734739413, 0.35253945024450484, 0.4979058771046442, 0.35580376124772706]
第1小组有5人
第2小组有5人
第3小组有5人
第4小组有5人
第5小组有5人
第6小组有5人
第7小组有5人
第8小组有5人
第9小组有5人
第10小组有5人
第G-100大组，每组分数：[0.5594584991063569, 0.6052172157916611, 0.3942962445426012, 0.5004079029790236, 0.48738444674117526, 0.5039893534444317, 0.7106777014161911, 0.6104265982868005, 0.4984543845433464, 0.38680775720583827]
第1小组有5人
第2小组有5人
第3小组有5人
第4小组有5人
第5小组有5人
第6小组有5人
第7小组有5人
第8小组有5人
第9小组有5人
第10小组有5人
第G-101大组，每组分数：[0.4590330