In [26]:
import pandas as pd

relative_path = '../../sourcedata/'
# 读取题目信息 title = 题目
title_info = pd.read_csv(relative_path + 'Data_TitleInfo.csv')

# 读取学生答题记录
# submit_record = pd.read_csv(relative_path + 'SubmitRecord-Class1-Example.csv')
submit_record = pd.read_csv(relative_path + 'Data_SubmitRecord/SubmitRecord-Class1.csv')

In [27]:
# 保留每个学生对每道题目的最高分
# sort_values：按 student_ID、title_ID 和 score 排序，确保最高分排在最前面。
# drop_duplicates：移除重复项，只保留每个学生对每道题目的最高分（因为 keep='first'）。
submit_record = submit_record.sort_values(by=['student_ID', 'title_ID', 'score'], ascending=[True, True, False])
submit_record = submit_record.drop_duplicates(subset=['student_ID', 'title_ID'], keep='first')

In [28]:
# 初始化学生-知识点得分矩阵
students = submit_record['student_ID'].unique()
knowledge_points = title_info['knowledge'].unique()
sub_knowledge_points = title_info['sub_knowledge'].unique()


student_subknowledge_score = pd.DataFrame(0, index=students, columns=sub_knowledge_points)
student_knowledge_score = pd.DataFrame(0, index=students, columns=knowledge_points)

# 计算每个学生在每个知识点上的得分
for _, row in submit_record.iterrows():
    student_id = row['student_ID'] # 取出学生ID
    score = row['score'] # 取出得分
    title_id = row['title_ID'] # 取出题目ID

    title_info_row = title_info[title_info['title_ID'] == title_id] # 取出对应的题目行信息
    subknowledge_point = title_info_row['sub_knowledge'].values[0] # 取题目对应的2级知识点
    knowledge_point = title_info_row['knowledge'].values[0] # 取题目对应的1级知识点
    # full_score = title_info_row['score'].values[0] # 取题目对应的满分值
    student_subknowledge_score.loc[student_id, subknowledge_point] += score
    student_knowledge_score.loc[student_id, knowledge_point] += score

# 标准化得分（0到1之间）
total_full_subscores = title_info.groupby('sub_knowledge')['score'].sum()
total_full_scores = title_info.groupby('knowledge')['score'].sum()
student_subknowledge_score = student_subknowledge_score.div(total_full_subscores, axis=1) # axis=1表示按列索引相除
student_knowledge_score = student_knowledge_score.div(total_full_scores, axis=1) # axis=1表示按列索引相除


In [29]:
# 将student_subknowledge_score，student_knowledge_score保存到文件
student_subknowledge_score.to_csv('./evaluation/student_subknowledge_score.csv')
student_knowledge_score.to_csv('./evaluation/student_knowledge_score.csv')

In [25]:
# 设定掌握阈值
threshold = 0.6
print(student_subknowledge_score.shape, student_subknowledge_score.columns.values)

# 判断每个学生的薄弱知识点 如果某个学生对某个知识点的掌握程度小于 60%，则对应的值为 True，否则为 False。
weak_subknowledge_points = student_subknowledge_score.applymap(lambda x: x < threshold)
weak_knowledge_points = student_knowledge_score.applymap(lambda x: x < threshold)
# print(weak_subknowledge_points.columns.values)

# 输出薄弱知识点
for student in weak_subknowledge_points.index:
    # 获取特定学生的薄弱知识点信息，即该学生在各知识点上的布尔值序列。筛选出布尔值为 True 的列名（即薄弱知识点）。
    weak_points = weak_subknowledge_points.columns[weak_subknowledge_points.loc[student]].tolist() 
    # print(f"学生 {student} 的薄弱知识点：{', '.join(weak_points)}")
# 输出薄弱知识点
for student in weak_knowledge_points.index:
    # 获取特定学生的薄弱知识点信息，即该学生在各知识点上的布尔值序列。筛选出布尔值为 True 的列名（即薄弱知识点）。
    weak_points = weak_knowledge_points.columns[weak_knowledge_points.loc[student]].tolist() 
    # print(f"学生 {student} 的薄弱知识点：{', '.join(weak_points)}")


(14, 15) ['b3C9s_j0v1yls8' 'b3C9s_l4z6od7y' 'g7R2j_e0v1yls8' 'g7R2j_j1g8gd3v'
 'k4W1c_h5r6nux7' 'm3D1v_r1d7fr3l' 'm3D1v_t0v5ts9h' 'm3D1v_v3d9is1x'
 'r8S3g_l0p5viby' 'r8S3g_n0m9rsw4' 's8Y2f_v4x8by9j' 't5V9e_e1k6cixp'
 'y9W5d_c0w4mj5h' 'y9W5d_e2j7p95s' 'y9W5d_p8g6dgtv']


  weak_subknowledge_points = student_subknowledge_score.applymap(lambda x: x < threshold)
  weak_knowledge_points = student_knowledge_score.applymap(lambda x: x < threshold)
