In [4]:
# 파이썬 프로그램 표절검사
# Copyrighted by Young Pyo Jun, ypjun@yonsei.ac.kr
# V1.0 (2022.05.07)

#-*-encoding:utf-8-*-
import pandas as pd
import os
import tkinter as tk
from tkinter import filedialog
import subprocess as sp
import sys


from colorama import init, Fore, Back, Style

# 채점 프로그램이 있는 폴더를 선택
def SelectFile():
    fname = ""
    window = tk.Tk()
    window.tkraise()
    window.file = filedialog.askdirectory(initialdir=".", title="Select")
    if window.file != None:
        fname = window.file
    window.destroy()
    return fname

def GetPNum(pName):
    if pName.split('.')[-1] != 'py': return 0
    nPS = pName.find('(')
    nPE = pName.find(')')
    if nPS == -1 or nPE == -1:
        n = 0
    else:
        st = pName[nPS+1:nPE]
        if st.isdigit():
            n = int(st)
        else:
            n = 0
    return n  

# 주석 제거
def deComment(pgm):
    skip2NL = False
    newPgm = ''
    for ch in pgm:
        if ch == '#':
            skip2NL = True
        if skip2NL:
            if ch == '\n':
                newPgm += ch
                skip2NL = False
            continue
        newPgm += ch
    return newPgm

def SimRate(isrc, idst, unit="byte"):  # 문자열 src가 dst에 얼마나 유사한가?
    if not isrc: return 100
    if not idst: return 0
    sim = 0
    src = deComment(isrc)
    dst = deComment(idst)
    if unit == "word":
        src = src.replace('.','')
        dst = dst.replace('.','')
        src = src.split()   # remove white space
        srcLen = len(src)
        dst = dst.split()
        dstLen = len(dst)    
    elif unit == "byte" or unit == "user":
        src = list(src)   # All characters
        srcLen = len(src)
        dst = list(dst)
        dstLen = len(dst)
    else:  # unit == "char" or uint == "serial":
        src = src.replace('.','')
        dst = dst.replace('.','')
        src = list(''.join(src.split()))   # remove white space
        srcLen = len(src)
        dst = list(''.join(dst.split()))
        dstLen = len(dst)
    for sc in src:    # src의 요소값이 dst에 얼많큼 있는가?
        if not dst: dst = []; break
        if sc in dst:
            sim += 1
            dst.remove(sc)
    return max(0, (sim * 100 / srcLen) - (len(dst) * 100 / dstLen))

def CheckPlagi(ansPgm, solPgm, stFolders, stf, ansTime):
    plgRate = SimRate(ansPgm, solPgm)
    plgPgm = solPgm
    plgInfo = "정답해설"
    for pstf in stFolders:
        if pstf == stf: continue
        pstName, pstId = pstf.split('-')
        pstId = pstId.split('_')[0]
        pstDir = stdDir+'/'+pstf
        pyFiles = os.listdir(pstDir)
        for pyF in pyFiles:
            dstFile = pstDir+'/'+pyF
            dstTime = os.stat(dstFile).st_mtime
            if dstTime > ansTime: continue
            fIn = open(dstFile, "rt", encoding='utf-8')
            pyPgm = fIn.read()
            fIn.close()
            psRate = SimRate(ansPgm, pyPgm)
            if psRate > plgRate:
                plgRate = psRate
                plgPgm = pyPgm
                plgInfo = "{}({})".format(pstName, pstId)
    return plgRate, plgPgm, plgInfo

#################################################################################################
HomeDir = SelectFile()    # 분반 폴더 선택
exceptNums = []
stdRate = 95
if HomeDir:
    class_name = HomeDir.split('/')[-1]
    os.chdir(HomeDir)
    stdDir = HomeDir + '/응시자'

    scrFile = "ScoringTable.xlsx"
    scrFrame = pd.read_excel(scrFile, dtype=str)
    stFolders = os.listdir(stdDir)
    exceptNums = list(int(n) for n in input("제외 문항: ").split())
    plgIn = input(f"표절율 ({stdRate}) : ")
    if plgIn.isdigit():
        stdRate = int(plgIn)
else:
    stFolders = []
    print("Exit...")

maxpNum = len(scrFrame)
PLAGIFILE = "plagiCheck.csv"
MIN_PLAGI = 300
PlagiCols = ('stid', 'pnum', 'name', 'program', 'who')
cSTID, cPNUM, cNAME, cPROGRAM, cWHO = PlagiCols
plagiFr = pd.DataFrame(columns=PlagiCols)

print(f"Starting to check ({class_name}) ======================================\n")
bStop = False
for stf in stFolders:               # Checking Student
    if bStop: break
    stName, stId = stf.split('-')
    stId = stId.split('_')[0]
    stDir = stdDir+'/'+stf
    pyFiles = os.listdir(stDir)
    pNums = []
    #print(25*"#")
    #print("Checking plagiarism for {}({})".format(stName, stId))
    for pyF in pyFiles:
        pNum = GetPNum(pyF)
        if pNum in exceptNums: continue
        if pNum < 1 or pNum > maxpNum or pNum in pNums:
            print("Incorrect in {} - {}".format(stName, stf))
            print("Files: {}\n".format(pyFiles))
            break
        pNums.append(pNum)
        numFrame = scrFrame[scrFrame.문제번호 == str(pNum)]
        if len(numFrame) == 0:
            print("No such number({})".format(pNum))
            continue
        solPgm = numFrame.정답프로그램.values[0]
        ansFile = stDir+'/'+pyF
        fIn = open(ansFile, "rt", encoding='utf-8')
        ansPgm = fIn.read()
        fIn.close()
        ansTime = os.stat(ansFile).st_mtime
        if len(ansPgm) < MIN_PLAGI: continue
        plgRate, plgPgm, plgInfo = CheckPlagi(ansPgm, solPgm, stFolders, stf, ansTime)
        if plgRate > stdRate:
            print(f"###############################################################################################")
            print("#### 표절자: {}({})".format(stName, stId))
            print("#### {}번문제 표절률: {:.0f}%\n#### 대상: {}".format(pNum, plgRate, plgInfo))
            print("#### 제출 프로그램\n" + 80 * "~")
            print(ansPgm)
            print(80*"^")
            print("#### 표절 대상 프로그램\n" + 80 * "~")
            print(plgPgm)
            print(80*"^")
            inp = input("표절 확정? ")
            if inp == "q": bStop = True; break
            elif inp == "":
                plgData = {cSTID:stId, cPNUM:pNum, cNAME:stName, cPROGRAM:plgPgm, cWHO:plgInfo}
                plagiFr = plagiFr.append(plgData, ignore_index=True)
print(f"\n################# check finished ({class_name}) ##############################")
plagiFr.to_csv("plagiCheck.csv", index=False, encoding="euc-kr")

제외 문항: 
표절율 (95) : 

###############################################################################################
#### 표절자: 박민주(2021240085)
#### 2번문제 표절률: 99%
#### 대상: 박나현(2021240456)
#### 제출 프로그램
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

name = input("이름:")
card = input("주민등록번호:").split("-")


sex = card[1]

year =  int(card[0][0:2])
month =int( card[0][2:4])
day = int(card[0][4:6])
c_year = 2022
c_month = 6
c_day = 1

if sex[:1] =="1":
    if month < 6:
        age = 2022 - (year+1900)
    elif month == 6:
        if day <1:
            age = 2022 - (year+1900)
        else:
            age = 2022- (year+1900) -1
    else:
        age = 2022 - (year+1900) - 1

    print(f"{name}:{age}세 남성")
    
elif sex[:1] =="2":
    if month < 6:
        age = 2022 - (year+1900)
    elif month == 6:
        if day <1:
            age = 2022 - (year+1900)
        else:
            age = 2022- (year+1900) -1
    else:
        age = 2022 - (year+1900) - 1

 

표절 확정? 
###############################################################################################
#### 표절자: 설태겸(2021240058)
#### 2번문제 표절률: 100%
#### 대상: 박성민(2021240767)
#### 제출 프로그램
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
name= input("이름: ")
num= input("주민등록번호: ")

birth, back= num.split("-")

yy= int(birth[0:2])
mm= int(birth[2:4])
dd= int(birth[4:6])

xy= int(back[0])

if xy == 1:
    age = 2022 - 1900 - yy
    if mm >= 6:
        age = age - 1
    else:
        age = age - 2
elif xy == 2:
    age = 2022 - 1900 - yy
    if mm >= 6:
        age = age - 1
    else:
        age = age - 2
elif xy == 3:
    age = 2022 - 2000 - yy
    if age == 0:
        age = 0
    else:
        if mm >= 6:
            if 1 - dd < 0:
                age = age - 1
        else:
            age = age - 2
elif xy == 4:
    age = 2022 - 2000 - yy
    if age == 0:
        age = 0
    else:
        if mm >= 6:
            if 1 - dd < 0:
                age = age -

In [101]:
pp = pd.read_csv("plagiCheck.csv", dtype=str, index_col=cSTID, encoding="euc-kr")

In [6]:
dt = pd.to_datetime(ansTime)

In [7]:
dt

Timestamp('1970-01-01 00:00:01.655443520')

In [8]:
ansTime

1655443520.0

In [9]:
dir(t)


NameError: name 't' is not defined

In [50]:
ll = pr.who

In [55]:
ll.values[0]

'김하루(2022240592)'

In [33]:
PlagiCols = ('stid', 'pnum', 'name', 'program', 'who')
cSTID, cPNUM, cNAME, cPROGRAM, cWHO = PlagiCols
plagiFr = pd.DataFrame(columns=PlagiCols)

4