# Generate the score report
Extract score for each sheet and convert GitHub Repo name to student ID

In [None]:
base_folder = "data/ITE3101_practical_tests/ite-3101-practical-test-ab-submissions/"
human_score_review = "human_score_review (completed).xlsx"
assignment_name = "ite-3101-practical-test-ab"
code_folder = "lab/lab01/"
student_id_github = "GitHub.xlsx"

In [None]:
%pip install pandas python-docx

In [None]:
import pandas as pd

# Load the Excel file
excel_file = pd.ExcelFile(base_folder + human_score_review)
student_df = pd.read_excel(base_folder + student_id_github)

# Student ID is string
student_df['Student ID'] = student_df['Student ID'].astype(str)

# Create an empty dictionary to store the sheet dataframes
sheet_dict = {}

# Iterate over each sheet in the Excel file
for sheet_name in excel_file.sheet_names:
    # Read the sheet data into a dataframe
    sheet_df = excel_file.parse(sheet_name)
    # Keep the Directory and Score columns
    sheet_df = sheet_df[['Directory', 'Score']]

    # Remove the assignment name from the directory
    sheet_df['Directory'] = sheet_df['Directory'].str.replace(
        assignment_name+"-", '')
    # rename Directory to GitHub Name
    sheet_df = sheet_df.rename(columns={'Directory': 'GitHub Name'})
    # # join the dataframes on GitHub Name. if GitHub Name does not exist in the student_df, it will be NaN
    # sheet_df = sheet_df.join(student_df.set_index('GitHub Name'), on='GitHub Name')

    # Rename Score to sheet_name
    sheet_df = sheet_df.rename(columns={'Score': sheet_name})

    # Add the dataframe to the dictionary with the sheet name as the key
    sheet_dict[sheet_name] = sheet_df

# Join all the dataframes in the dictionary value together into one dataframe on the GitHub Name column
df = pd.concat(sheet_dict.values(), axis=1, join='outer')
# Keep the first GitHub Name column
df = df.iloc[:, ~df.columns.duplicated()]

In [None]:
df['Score'] = df.drop('GitHub Name', axis=1).sum(axis=1)

# join the dataframes on GitHub Name. if GitHub Name does not exist in the student_df, it will be NaN
df = df.join(student_df.set_index('GitHub Name'), on='GitHub Name')

# Student ID to the first column, then Score to the second column
cols = df.columns.tolist()
cols = cols[-2:] + cols[:-2]
df = df[cols]
# swap the first two columns
cols = df.columns.tolist()
cols = cols[1:2] + cols[0:1] + cols[2:]
df = df[cols]
df.head()

In [None]:
df.to_excel(base_folder+assignment_name+".xlsx", index=False)

# Generate Script Sample
5 set Samples:

Combined scripts
1. 3 Good, 3 Average, and 3 Weak.
2. 5 Good, 5 Average, and 5 Weak.
3. 3 Good, 3 Average, and 3 Weak above the passing mark.
4. 5 Good, 5 Average, and 5 Weak above the passing mark.

In [None]:
passingMark = 30

In [None]:
import os
from docx import Document
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
from docx.shared import Pt

# remove teachers "GitHub Name"
df = df[df["GitHub Name"] != "cloud-and-data-centre-administration"]
df = df[df["GitHub Name"] != "it114115-student"]

sampling = df.sort_values(by=["Score"], ascending=False)["Score"]

def add_headline(doc, text):
    # Add a paragraph with "GOOD" in the center
    paragraph = doc.add_paragraph()
    run = paragraph.add_run(text)
    run.bold = True
    run.font.size = Pt(40)  # Set the font size to 40
    paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
    doc.add_page_break()


def add_group(doc, name, index):
    add_headline(doc, name)
    for i in index:
        row = df.loc[i]
        doc.add_paragraph("Student ID: " + str(row["Student ID"]))
        doc.add_paragraph("GitHub Name: " + str(row["GitHub Name"]))
        doc.add_paragraph("Total Score: " + str(row["Score"]))
        doc.add_page_break()
        # inert horizontal line
          

        # Loop through the reset column and read the file as text
        for column_name in df.columns:
            if column_name not in ["Student ID", "GitHub Name", "Score"]:
                code_file_path = os.path.join(
                    base_folder, assignment_name+"-"+row["GitHub Name"], code_folder, column_name)
                with open(code_file_path, 'r') as file:
                    text = file.read()
                    doc.add_paragraph(
                        "Question: " + column_name)
                    doc.add_paragraph("Score: " + str(row[column_name]))
                    #add new line
                    doc.add_paragraph()
                    doc.add_paragraph("____________________________________________________________________________________") 
                    doc.add_paragraph(text)
                    doc.add_page_break()


def take_sample(n, sampling, suffix=""):
    if len(sampling) < 3 * n:
        n = int(len(sampling) / 3)
    good = sampling.head(n)
    weak = sampling.tail(n)
    median = int(len(sampling) / 2)
    take = int(n / 2)
    average = sampling.iloc[median - take: median + take]

    # Create a Document object
    doc = Document()
    add_group(doc, "Good", good.index)
    add_group(doc, "Average", average.index)
    add_group(doc, "Weak", weak.index)
    doc.save(base_folder+"/sampleOf" + str(n) + suffix + ".docx")


take_sample(3, sampling)
take_sample(5, sampling)
# filter only pass
sampling = df[df["Score"] >= passingMark].sort_values(
    by=["Score"], ascending=False)["Score"]
take_sample(3, sampling, "_only_pass")
take_sample(5, sampling, "_only_pass")

Zip for backup

In [None]:
import shutil

# Get the parent folder name
parent_folder = os.path.basename(os.path.dirname(base_folder))

# Create the zip file name
zip_file_name = parent_folder + "(completed).zip"

# get base_folder up one folder
zip_file_name = os.path.join(base_folder,"../"+zip_file_name)
zip_file_name

# zip base_folder and save to zip_file_name
shutil.make_archive(zip_file_name.replace(".zip", ""), 'zip', base_folder)