# Post Processing
1. Backup grading result.
2. Generate score report.
3. Create individual scored pdf.
4. Collect samples.
5. Email the individual scored pdf.

In [None]:
pdf_file = "../data/TestScript.pdf"

In [None]:
import os

file_name = os.path.basename(pdf_file)
file_name = os.path.splitext(file_name)[0]
base_path = "../marking_form/" + file_name
base_path_images = base_path + "/images/"
base_path_annotations = base_path+"/annotations/"
base_path_questions = base_path+"/questions"

base_path_marked_images = base_path + "/marked/images/"
base_path_marked_pdfs = base_path + "/marked/pdf/"
base_path_marked_scripts = base_path + "/marked/scripts/"
os.makedirs(base_path_marked_images, exist_ok=True)
os.makedirs(base_path_marked_pdfs, exist_ok=True)
os.makedirs(base_path_marked_scripts, exist_ok=True)


### Remove version history
Before you backup.

In [None]:
import os
for path, currentDirectory, files in os.walk(base_path_questions):
    for file in files:
        if file.startswith("control-") or file.startswith("mark-"):
            os.remove(os.path.join(path, file))

Zip the website.

In [None]:
import shutil
shutil.make_archive(base_path,"zip",base_path)

# Generate Score Report

You must visit the ID and Name pages to verify the values before generate the marksheet.

In [None]:
import os
import json
import pandas as pd
from termcolor import colored

pageToStudentId = {}

# read base_path_annotations
with open(os.path.join(base_path_annotations, "annotations.json")) as f:
    data = json.load(f)
    # data is a dict and get the number of items
numberOfPage = len(data)

with open(os.path.join(base_path_questions, "ID", "mark.json")) as f:
    data = json.load(f)
    for i in data:
        pageToStudentId[i["id"]] = (
            i["overridedMark"] if i["overridedMark"] != "" else i["mark"]
        )


def getStudentId(page):
    # search reverse for the student ID page.
    for p in range(page, page - numberOfPage, -1):
        if str(p) in pageToStudentId:
            return pageToStudentId[str(p)]
    print(colored("{} is not in pageToStudentId!".format(page), "red"))
    return None


questionAndMarks = {}
for path, currentDirectory, files in os.walk(base_path_questions):
    for file in files:
        if file == "mark.json":
            question = path[len(base_path_questions) + 1 :]
            f = open(os.path.join(path, file))
            data = json.load(f)
            marks = {}
            for i in data:
                studentId = getStudentId(int(i["id"]))
                marks[studentId] = (
                    i["overridedMark"] if i["overridedMark"] != "" else i["mark"]
                )
            questionAndMarks[question] = marks
            f.close()
marksDf = pd.DataFrame(questionAndMarks)
marksDf = marksDf[
    ["ID", "NAME", "CLASS"]
    + [
        col
        for col in sorted(marksDf.columns)
        if col != "ID" and col != "NAME" and col != "CLASS"
    ]
]

marksDf["Marks"] = (
    marksDf.loc[:, ~marksDf.columns.isin(["ID", "NAME", "CLASS"])]
    .apply(pd.to_numeric)
    .sum(axis=1)
)
print(marksDf)

# Create Scored Scripts

Copy raw images to marked folder

In [None]:
import shutil
import os

if os.path.exists(base_path_marked_images):
    shutil.rmtree(base_path_marked_images)
shutil.copytree(base_path_images, base_path_marked_images)

In [None]:
import json
annotations_path = base_path_annotations + "annotations.json"
with open(annotations_path, "r") as f: 
    annotations = json.load(f)          

#flatten annotations to list 
annotations_list = []
for page in annotations:
    for annotation in annotations[page]:
        annotation["page"] = int(page)
        # x to left, y to top
        annotation["left"] = annotation["x"]
        annotation["top"] = annotation["y"]
        annotation.pop("x")
        annotation.pop("y")
        annotations_list.append(annotation) 
annotations_list

# convert annotations_list to dict with key with label
annotations_dict = {}
for annotation in annotations_list:
    annotations_dict[annotation["label"]] = annotation
annotations_dict


In [None]:
studentIdToPage={}
with open(os.path.join(base_path_questions, "ID", "mark.json")) as f:
    data = json.load(f)
    for i in data:
        studentId = i["overridedMark"] if i["overridedMark"] != "" else i["mark"]
        studentIdToPage[studentId] = int(i["id"])
studentIdToPage


In [None]:
import cv2
from IPython.display import display
from ipywidgets import IntProgress


# Covert marksDf to dict
marksDf_list = marksDf.to_dict(orient="records")

f = IntProgress(min=0, max=len(marksDf_list))  # instantiate the bar
display(f)  # display the bar

for student in marksDf_list:
    first_page = studentIdToPage[student["ID"]]
    for annotation in annotations_dict:
        value = student[annotation]
        x = annotations_dict[annotation]["left"]
        y = annotations_dict[annotation]["top"]
        page = first_page + annotations_dict[annotation]["page"]
      
        image_path = base_path_marked_images + str(page) + ".jpg"
        # print(value, x, y, imagePath)
        img = cv2.imread(image_path)
        cv2.putText(img, str(value), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        cv2.imwrite(image_path, img)
    f.value += 1      



In [None]:
from PIL import Image

for student in marksDf_list:
    studentId = student["ID"]
    first_page = studentIdToPage[student["ID"]]
    last_page = first_page + numberOfPage - 1
    print(studentId, first_page, last_page)
    pdf_path = base_path_marked_pdfs + studentId + ".pdf"

    images = list(map(Image.open, [base_path_marked_images + str(i) + ".jpg" for i in range(first_page, last_page + 1)]))
    images[0].save(pdf_path, save_all=True, append_images=images[1:]) 
        

# Generate Script Sample

5 set Samples:
1. Combined scripts
2. 3 Good, 3 Average, and 3 Weak.
3. 5 Good, 5 Average, and 5 Weak.
4. 3 Good, 3 Average, and 3 Weak above the passing mark.
5. 5 Good, 5 Average, and 5 Weak above the passing mark. 

In [None]:
passingMark = 0

In [None]:
from PyPDF4 import PdfFileMerger

writer = PdfFileMerger(strict=True)

# merge all pdfs in base_path_marked_pdfs
for path, currentDirectory, files in os.walk(base_path_marked_pdfs):
    for file in files:
        if file.endswith(".pdf"):
            pdf_path = os.path.join(path, file)
            writer.append(pdf_path)
writer.write(base_path_marked_scripts + "all.pdf")           

In [None]:
from PyPDF4 import PdfFileMerger, PdfFileReader

sampling = marksDf.sort_values(by=["Marks"], ascending=False)["Marks"]

from_directory = os.path.join(os.getcwd(), "..", "templates", "pdf")

goodPage = PdfFileReader(from_directory + "/Good.pdf")
averagePage = PdfFileReader(from_directory + "/Average.pdf")
weakPage = PdfFileReader(from_directory + "/Weak.pdf")


def get_scripts_psf(df):
    return list(map(lambda rowNumber: base_path_marked_pdfs + rowNumber + ".pdf", df.index))


def take_sample(n, sampling, suffix=""):
    if len(sampling) < 3 * n:
        n = int(len(sampling) / 3)
    good = sampling.head(n)
    weak = sampling.tail(n)
    median = int(len(sampling) / 2)
    take = int(n / 2)
    average = sampling.iloc[median - take : median + take]

    merger = PdfFileMerger()
    merger.append(goodPage)
    for pdf in get_scripts_psf(good):
        merger.append(PdfFileReader(pdf))
    merger.append(averagePage)
    for pdf in get_scripts_psf(average):
        merger.append(PdfFileReader(pdf))
    merger.append(weakPage)
    for pdf in get_scripts_psf(weak):
        merger.append(PdfFileReader(pdf))
    fileName = base_path_marked_scripts + "sampleOf" + str(n) + suffix + ".pdf"
    merger.write(open(fileName, "wb"))
    print("Output successfully written to" + fileName)
    merger.close()


take_sample(3, sampling)
take_sample(5, sampling)

sampling = sampling.where(lambda x: x > passingMark)
take_sample(3, sampling, "_only_pass")
take_sample(5, sampling, "_only_pass")


In [None]:
# save marksDf to excel
marksDf.to_excel(base_path_marked_scripts + "details_score_report.xlsx", index=False)
# save marksDf to excel but only show ID, NAME, CLASS, Marks
marksDf[["ID", "NAME", "CLASS", "Marks"]].to_excel(base_path_marked_scripts + "score_report.xlsx", index=False)

In [None]:
from IPython.display import FileLink 

# zip base_path_marked_scripts folder
script_zip = base_path_marked_scripts + "../scripts"
shutil.make_archive(script_zip, "zip", base_path_marked_scripts)
FileLink(script_zip + ".zip")