# Post Processing
1. Backup grading result.
2. Generate score report.
3. Create individual scored pdf.
4. Collect samples.

In [1]:
pdf_file = "../data/TestScript.pdf"

In [2]:
import os

file_name = os.path.basename(pdf_file)
file_name = os.path.splitext(file_name)[0]
base_path = "../marking_form/" + file_name
base_path_images = base_path + "/images/"
base_path_annotations = base_path+"/annotations/"
base_path_questions = base_path+"/questions"

base_path_marked_images = base_path + "/marked/images/"
base_path_marked_pdfs = base_path + "/marked/pdf/"
base_path_marked_scripts = base_path + "/marked/scripts/"
os.makedirs(base_path_marked_images, exist_ok=True)
os.makedirs(base_path_marked_pdfs, exist_ok=True)
os.makedirs(base_path_marked_scripts, exist_ok=True)

## Backup grading result
Remove version history, before you backup.

In [3]:
import os
for path, currentDirectory, files in os.walk(base_path_questions):
    for file in files:
        if file.startswith("control-") or file.startswith("mark-"):
            os.remove(os.path.join(path, file))

Zip the website.

In [4]:
import shutil
shutil.make_archive(base_path,"zip",base_path)

'/workspaces/ai-scoring-handwriting-assignment/marking_form/TestScript.zip'

# Generate Score Report

Check the ID and Name pages to verify the values before generate the marksheet.

In [5]:
import os
import json
import pandas as pd
from termcolor import colored

pageToStudentId = {}

# read base_path_annotations
with open(os.path.join(base_path_annotations, "annotations.json")) as f:
    data = json.load(f)
    # data is a dict and get the number of items
numberOfPage = len(data)

with open(os.path.join(base_path_questions, "ID", "mark.json")) as f:
    data = json.load(f)
    for i in data:
        pageToStudentId[i["id"]] = (
            i["overridedMark"] if i["overridedMark"] != "" else i["mark"]
        )


def getStudentId(page):
    # search reverse for the student ID page.
    for p in range(page, page - numberOfPage, -1):
        if str(p) in pageToStudentId:
            return pageToStudentId[str(p)]
    print(colored("{} is not in pageToStudentId!".format(page), "red"))
    return None


questionAndMarks = {}
for path, currentDirectory, files in os.walk(base_path_questions):
    for file in files:
        if file == "mark.json":
            question = path[len(base_path_questions) + 1 :]
            f = open(os.path.join(path, file))
            data = json.load(f)
            marks = {}
            for i in data:
                studentId = getStudentId(int(i["id"]))
                marks[studentId] = (
                    i["overridedMark"] if i["overridedMark"] != "" else i["mark"]
                )
            questionAndMarks[question] = marks
            f.close()
marksDf = pd.DataFrame(questionAndMarks)
marksDf = marksDf[
    ["ID", "NAME", "CLASS"]
    + [
        col
        for col in sorted(marksDf.columns)
        if col != "ID" and col != "NAME" and col != "CLASS"
    ]
]

marksDf["Marks"] = (
    marksDf.loc[:, ~marksDf.columns.isin(["ID", "NAME", "CLASS"])]
    .apply(pd.to_numeric)
    .sum(axis=1)
)
print(marksDf)

                  ID                                NAME CLASS 1a 1b 1c 1d 1e  \
230343435  230343435                          LAUYukTing    IA  2  0  2  2  2   
230490969  230490969                         C9LAMHokMan    1D  2  0  2  2  2   
230037561  230037561                           YUNGLitYu    1A  2  0  2  2  2   
230000997  230000997                     CHANChun   Hang    1A  2  0  2  2  2   
230241732  230241732   WciOauolonILY1 '09*4LEUNG WingHim    1C  2  2  2  0  2   
...              ...                                 ...   ... .. .. .. .. ..   
230034663  230034663       il Ouuulioio |TUyiaWONG TszNi    1B  0  0  2  2  2   
230341478  230341478  Til Ouucliuii lu nuglaLEUNG TungWa    1A  2  0  2  2  2   
230257268  230257268           Uci L44encIo189*4LAUKaHim    1C  2  0  2  0  2   
230151517  230151517          Unei sJenIoTo9*4LAMKwanYin    1B  2  2  2  2  2   
230108158  230108158        aeici OacuodTo9ieCHANHinTing    1B  2  2  0  0  2   

          1f 1g  ... 4g 4h 

# Create Scored Scripts

Copy raw images to marked folder

In [6]:
import shutil
import os

if os.path.exists(base_path_marked_images):
    shutil.rmtree(base_path_marked_images)
shutil.copytree(base_path_images, base_path_marked_images)

'../marking_form/TestScript/marked/images/'

In [7]:
import json
annotations_path = base_path_annotations + "annotations.json"
with open(annotations_path, "r") as f: 
    annotations = json.load(f)          

#flatten annotations to list 
annotations_list = []
for page in annotations:
    for annotation in annotations[page]:
        annotation["page"] = int(page)
        # x to left, y to top
        annotation["left"] = annotation["x"]
        annotation["top"] = annotation["y"]
        annotation.pop("x")
        annotation.pop("y")
        annotations_list.append(annotation) 
annotations_list

# convert annotations_list to dict with key with label
annotations_dict = {}
for annotation in annotations_list:
    annotations_dict[annotation["label"]] = annotation
annotations_dict


{'1a': {'width': 209,
  'height': 91,
  'label': '1a',
  'page': 0,
  'left': 211,
  'top': 400},
 '1b': {'width': 210,
  'height': 75,
  'label': '1b',
  'page': 0,
  'left': 488,
  'top': 409},
 '1c': {'width': 198,
  'height': 79,
  'label': '1c',
  'page': 0,
  'left': 772,
  'top': 405},
 '1d': {'width': 196,
  'height': 75,
  'label': '1d',
  'page': 0,
  'left': 1056,
  'top': 408},
 '1e': {'width': 205,
  'height': 76,
  'label': '1e',
  'page': 0,
  'left': 1325,
  'top': 405},
 '1f': {'width': 197,
  'height': 68,
  'label': '1f',
  'page': 0,
  'left': 212,
  'top': 491},
 '1g': {'width': 207,
  'height': 72,
  'label': '1g',
  'page': 0,
  'left': 492,
  'top': 490},
 'NAME': {'width': 339,
  'height': 64,
  'label': 'NAME',
  'page': 0,
  'left': 235,
  'top': 214},
 'ID': {'width': 234,
  'height': 60,
  'label': 'ID',
  'page': 0,
  'left': 294,
  'top': 275},
 'CLASS': {'width': 144,
  'height': 63,
  'label': 'CLASS',
  'page': 0,
  'left': 789,
  'top': 282},
 '1h': {

In [8]:
studentIdToPage={}
with open(os.path.join(base_path_questions, "ID", "mark.json")) as f:
    data = json.load(f)
    for i in data:
        studentId = i["overridedMark"] if i["overridedMark"] != "" else i["mark"]
        studentIdToPage[studentId] = int(i["id"])
studentIdToPage

{'230098093': 0,
 '230343435': 10,
 '230490969': 100,
 '230066821': 102,
 '230077675': 104,
 '230070545': 106,
 '230037561': 108,
 '230040319': 110,
 '230235541': 112,
 '230235516': 114,
 '230034000': 116,
 '230318186': 118,
 '230343367': 12,
 '230093940': 120,
 '230104504': 122,
 '230085651': 124,
 '230353846': 126,
 '230000997': 128,
 '230196873': 130,
 '230309903': 132,
 '230155485': 134,
 '230172003': 136,
 '230083946': 138,
 '230351785': 14,
 '230059448': 140,
 '230090833': 142,
 '230029966': 144,
 '230494004': 146,
 '230042860': 148,
 '230242175': 150,
 '230075983': 152,
 '230046521': 154,
 '230044620': 156,
 '230096825': 158,
 '230279433': 16,
 '230241732': 160,
 '230283090': 162,
 '230172377': 164,
 '230345729': 166,
 '230033802': 168,
 '230241744': 170,
 '230504303': 172,
 '230049222': 174,
 '230065744': 176,
 '230189974': 178,
 '230166592': 18,
 '230494305': 180,
 '230084408': 182,
 '230331203': 184,
 '230126686': 186,
 '230021864': 188,
 '230030824': 190,
 '230247539': 192,


In [12]:
import cv2
from IPython.display import display
from ipywidgets import IntProgress


# Covert marksDf to dict
marksDf_list = marksDf.to_dict(orient="records")

f = IntProgress(min=0, max=len(marksDf_list))  # instantiate the bar
display(f)  # display the bar

for student in marksDf_list:
    first_page = studentIdToPage[student["ID"]]
    for annotation in annotations_dict:
        value = student[annotation]
        if annotation == "ID":
            value = value + " Marks: " + str(student["Marks"])
        x = annotations_dict[annotation]["left"]
        y = annotations_dict[annotation]["top"]
        page = first_page + annotations_dict[annotation]["page"]
      
        image_path = base_path_marked_images + str(page) + ".jpg"
        # print(value, x, y, imagePath)
        img = cv2.imread(image_path)
        textSize = cv2.getTextSize(text=str(value), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, thickness=2)
        height = textSize[0][1]
        cv2.putText(img, str(value), (x, y + height), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
        cv2.imwrite(image_path, img)
    f.value += 1      

IntProgress(value=0, max=105)

KeyboardInterrupt: 

In [10]:
from PIL import Image

for student in marksDf_list:
    studentId = student["ID"]
    first_page = studentIdToPage[student["ID"]]
    last_page = first_page + numberOfPage - 1
    print(studentId, first_page, last_page)
    pdf_path = base_path_marked_pdfs + studentId + ".pdf"

    images = list(map(Image.open, [base_path_marked_images + str(i) + ".jpg" for i in range(first_page, last_page + 1)]))
    images[0].save(pdf_path, save_all=True, append_images=images[1:]) 
        

230343435 10 11
230490969 100 101
230037561 108 109
230000997 128 129
230241732 160 161
230329096 28 29
230180642 30 31
230033992 34 35
230054139 4 5
230056495 70 71
230054582 32 33
230241744 170 171
230193416 94 95
230029966 144 145
230229153 22 23
230109155 56 57
230098093 0 1
230046521 154 155
230039972 208 209
230081209 26 27
230074777 46 47
230137314 62 63
230166592 18 19
230090894 198 199
230036515 200 201
230035008 58 59
230172076 96 97
230262137 36 37
230172003 136 137
230331203 184 185
230110191 44 45
230087933 50 51
230235541 112 113
230494305 180 181
230168063 20 21
230353858 202 203
230128250 52 53
230037192 64 65
230040319 110 111
230309903 132 133
230106221 68 69
230045874 92 93
230085651 124 125
230083946 138 139
230172377 164 165
230047321 60 61
230066821 102 103
230104504 122 123
230196873 130 131
230247539 192 193
230042915 24 25
230343367 12 13
230042860 148 149
230049222 174 175
230125155 194 195
230193557 196 197
230036889 2 3
230060712 40 41
230345570 98 99
230044

# Generate Script Sample

5 set Samples:
1. Combined scripts
2. 3 Good, 3 Average, and 3 Weak.
3. 5 Good, 5 Average, and 5 Weak.
4. 3 Good, 3 Average, and 3 Weak above the passing mark.
5. 5 Good, 5 Average, and 5 Weak above the passing mark. 

In [11]:
passingMark = 15

In [12]:
from PyPDF4 import PdfFileMerger

writer = PdfFileMerger(strict=True)

# merge all pdfs in base_path_marked_pdfs
for path, currentDirectory, files in os.walk(base_path_marked_pdfs):
    for file in files:
        if file.endswith(".pdf"):
            pdf_path = os.path.join(path, file)
            writer.append(pdf_path)
writer.write(base_path_marked_scripts + "all.pdf")           

In [13]:
from PyPDF4 import PdfFileMerger, PdfFileReader

sampling = marksDf.sort_values(by=["Marks"], ascending=False)["Marks"]

from_directory = os.path.join(os.getcwd(), "..", "templates", "pdf")

goodPage = PdfFileReader(from_directory + "/Good.pdf")
averagePage = PdfFileReader(from_directory + "/Average.pdf")
weakPage = PdfFileReader(from_directory + "/Weak.pdf")


def get_scripts_psf(df):
    return list(map(lambda rowNumber: base_path_marked_pdfs + rowNumber + ".pdf", df.index))


def take_sample(n, sampling, suffix=""):
    if len(sampling) < 3 * n:
        n = int(len(sampling) / 3)
    good = sampling.head(n)
    weak = sampling.tail(n)
    median = int(len(sampling) / 2)
    take = int(n / 2)
    average = sampling.iloc[median - take : median + take]

    merger = PdfFileMerger()
    merger.append(goodPage)
    for pdf in get_scripts_psf(good):
        merger.append(PdfFileReader(pdf))
    merger.append(averagePage)
    for pdf in get_scripts_psf(average):
        merger.append(PdfFileReader(pdf))
    merger.append(weakPage)
    for pdf in get_scripts_psf(weak):
        merger.append(PdfFileReader(pdf))
    fileName = base_path_marked_scripts + "sampleOf" + str(n) + suffix + ".pdf"
    merger.write(open(fileName, "wb"))
    print("Output successfully written to" + fileName)
    merger.close()


take_sample(3, sampling)
take_sample(5, sampling)

sampling = sampling.where(lambda x: x > passingMark)
take_sample(3, sampling, "_only_pass")
take_sample(5, sampling, "_only_pass")

Output successfully written to../marking_form/TestScript/marked/scripts/sampleOf3.pdf
Output successfully written to../marking_form/TestScript/marked/scripts/sampleOf5.pdf
Output successfully written to../marking_form/TestScript/marked/scripts/sampleOf3_only_pass.pdf
Output successfully written to../marking_form/TestScript/marked/scripts/sampleOf5_only_pass.pdf


In [14]:
# save marksDf to excel
marksDf.to_excel(base_path_marked_scripts + "details_score_report.xlsx", index=False)
# save marksDf to excel but only show ID, NAME, CLASS, Marks
marksDf[["ID", "NAME", "CLASS", "Marks"]].to_excel(base_path_marked_scripts + "score_report.xlsx", index=False)

In [15]:
from IPython.display import FileLink 

# zip base_path_marked_scripts folder
script_zip = base_path_marked_scripts + "../scripts"
shutil.make_archive(script_zip, "zip", base_path_marked_scripts)
FileLink(script_zip + ".zip")