### Library Imports

In [114]:
from src.info_extractor import InfoExtractor
import os
import pandas as pd
from nltk.stem import WordNetLemmatizer, SnowballStemmer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.feature_extraction import DictVectorizer
from sklearn.neighbors import NearestNeighbors
from sklearn.linear_model import LogisticRegression
import numpy as np
import shutil 

pd.options.display.max_columns = None

### Reading resume paths for Training Data

In [115]:
trainResumePathDictionary = {}
trainResumeSkillsDictionary = {}
trainY = []
# resumeBaseUrl = "src/data/test/resumes/testData/"
resumeBaseUrl = "src/data/test/resumes/"
processingSet = ['FE', 'BE', 'QA', 'DevOps']
# processingSet = ['FE', 'BE']
dataFrameDictionary = {}
# processingSet = ['QA']
try:
    for currentSet in processingSet:
        currentPath = resumeBaseUrl + currentSet
        trainResumePathDictionary[currentSet] = [os.path.join(currentPath, f) for f in os.listdir(currentPath) if os.path.isfile(os.path.join(currentPath, f))]
except:
    print('Error')
    pass
# JDPath = "src/data/test/JDs"
# JDs = [os.path.join(JDPath, f) for f in os.listdir(JDPath) if os.path.isfile(os.path.join(JDPath, f))]


### Utility Functions

In [116]:
resumeVectorizer = CountVectorizer()
testAlgo = LogisticRegression(solver='lbfgs', multi_class='auto')
def prepareResumeNameAsIndex(resumesList):
    indexes = {}
    for i in range(len(resumesList)):
        indexes[i] = resumesList[i].split("/")[len(resumesList[i].split("/")) - 1]
    return indexes

def prepareOutputClassesForTrainingSet(currentSet):
    if currentSet == 'FE':
         trainY.append(0)
    elif currentSet == 'BE':
        trainY.append(1)
    elif currentSet == 'QA':
        trainY.append(2)
    elif currentSet == 'DevOps':
        trainY.append(3)

def extractTrainingText(resumes, currentSet):
    countFilesRead = 0
    trainResumeSkillsDictionary[currentSet] = []
    tempSplittedTextForDataFrame = []
    tempSplittedTextContainerForDataFrame = []
    currentResumeDataFrame = {}
    for currentResume in resumes:
        countFilesRead += 1
        if countFilesRead % 100 == 0:
            print("Resumes Read for " + currentSet + " = " + str(countFilesRead))
        tempSplittedTextForDataFrame = InfoExtractor.extractSkills(currentResume)
        tempSplittedTextContainerForDataFrame.append(tempSplittedTextForDataFrame)
        individualResumeSkills = " ".join(tempSplittedTextForDataFrame)
        trainResumeSkillsDictionary[currentSet].append(individualResumeSkills)
        prepareOutputClassesForTrainingSet(currentSet)
    currentResumeDataFrame = pd.DataFrame(tempSplittedTextContainerForDataFrame)
    tempSplittedTextContainerForDataFrame = []
    tempSplittedTextForDataFrame = []
    currentResumeDataFrame.rename(index=prepareResumeNameAsIndex(trainResumePathDictionary[currentSet]), inplace=True)
    return currentResumeDataFrame

def trainDataSet():
    for currentSet in processingSet:
        dataFrameDictionary[currentSet] = extractTrainingText(trainResumePathDictionary[currentSet], currentSet)
        print('----------Extraction completed for dataset: ' + currentSet + '------------')
        
def fetchValuesForTraining(currentDataset):
    tempSkillsToTrainSet = []
    for currentSet in processingSet:
        tempSkillsToTrainSet += currentDataset[currentSet]
    return tempSkillsToTrainSet

def normalizeLanguageForMachine():
    Resume_Vector = []
    normalizedData = []
    
    skillsToTrain = fetchValuesForTraining(trainResumeSkillsDictionary)
    resumeVectorizer.fit(skillsToTrain)
    
    for text in skillsToTrain:
        vector = resumeVectorizer.transform([text])
        Resume_Vector.append(vector.toarray())
    

    for x in Resume_Vector:
        normalizedData.append(x[0])
        
    return normalizedData

def classifyResumesInFolders(source, destination):
    if not os.path.exists(destination.rsplit('/', 1)[0]):
        os.makedirs(destination.rsplit('/', 1)[0])
    dest = shutil.copyfile(source, destination) 

def classifyTestedResumes(testResumes, predictedResumes):
    resultDestinationBaseUrl = "src/data/result/resumes/"
    namesOnly = []
    predictedNames = []
    for i in range(len(testResumes)):
        namesOnly.append(testResumes[i].split("/")[len(testResumes[i].split("/")) - 1])
    for i in range(len(predictedResumes)):
        currentName = namesOnly[i].split("\\")[len(testResumes[i].split("\\")) - 1]
        if predictedResumes[i] == 0:
            classifyResumesInFolders(testResumes[i], resultDestinationBaseUrl + 'FE/' + currentName)
            predictedNames.append("Front End Resume")
        elif predictedResumes[i] == 1:
            classifyResumesInFolders(testResumes[i], resultDestinationBaseUrl + 'BE/' + currentName)
            predictedNames.append("Back End Resume")
        elif predictedResumes[i] == 2:
            classifyResumesInFolders(testResumes[i], resultDestinationBaseUrl + 'QA/' + currentName)
            predictedNames.append("QA Resume")
        elif predictedResumes[i] == 3:
            classifyResumesInFolders(testResumes[i], resultDestinationBaseUrl + 'DevOps/' + currentName)
            predictedNames.append("DevOps Resume")
    return {'Name':namesOnly, 'Results':predictedNames}
    

def testAndClassifyResumes():
    resumePathTest = "src/data/test/resumes/Test"
    testResumes = [os.path.join(resumePathTest, f) for f in os.listdir(resumePathTest) if os.path.isfile(os.path.join(resumePathTest, f))]
    skillsToTrainTest = []
    testResume = ""
    for testResume in testResumes:
        testSkills = InfoExtractor.extractSkills(testResume)
        skillsToTrainTest.append(" ".join(testSkills))
    newArrayToPredict = resumeVectorizer.transform(skillsToTrainTest).toarray()
    predictedResumes = testAlgo.predict(newArrayToPredict)
    return classifyTestedResumes(testResumes, predictedResumes)

def trainMachineLearningAlgorithm(normalizedDataForProcessing, trainY):
    trainX = np.array(normalizedDataForProcessing)
    trainY = np.array(trainY)
    trainY = trainY.reshape(-1, 1)
    testAlgo.fit(trainX, trainY)
    print(trainX.shape)
    print(trainY.shape)

#     "src/data/test/resumes/export_dataframe.csv"
def getTrainingDataFromCSV(file):
    trainingSetFromCSV = pd.read_csv(file)
    trainYFromFile = np.array(trainingSetFromCSV['outputClass']).reshape(-1,1)
    trainXFromFile = np.array(trainingSetFromCSV.drop(columns=['outputClass']).values.tolist())
    print(trainYFromFile.shape)
    print(trainXFromFile.shape)
    return trainXFromFile, trainYFromFile, trainingSetFromCSV

def normalizeDataAndWriteToFile(file):
    normalizedDataForProcessing = normalizeLanguageForMachine()
    TransformedResumesData = pd.DataFrame(normalizedDataForProcessing)
    TransformedResumesData = TransformedResumesData.join(pd.DataFrame({'outputClass': trainY}))
    print(TransformedResumesData.shape)
    TransformedResumesData.rename(index=prepareResumeNameAsIndex(fetchValuesForTraining(trainResumePathDictionary)), inplace=True)
    # TransformedResumesData.columns = resumeVectorizer.get_feature_names()
    print(TransformedResumesData.shape)
    export_csv = TransformedResumesData.to_csv (file, index = None, header=True)
    return normalizedDataForProcessing

### Train Resumes

In [117]:
trainDataSet()

Resumes Read for FE = 100
Resumes Read for FE = 200
Resumes Read for FE = 300
Resumes Read for FE = 400
Resumes Read for FE = 500
Resumes Read for FE = 600
Resumes Read for FE = 700
Resumes Read for FE = 800
Resumes Read for FE = 900
Resumes Read for FE = 1000
Resumes Read for FE = 1100
Resumes Read for FE = 1200
----------Extraction completed for dataset: FE------------
Resumes Read for BE = 100
Resumes Read for BE = 200
Resumes Read for BE = 300
Resumes Read for BE = 400
Resumes Read for BE = 500
Resumes Read for BE = 600
Resumes Read for BE = 700
Resumes Read for BE = 800
Resumes Read for BE = 900
Resumes Read for BE = 1000
Resumes Read for BE = 1100
Resumes Read for BE = 1200
----------Extraction completed for dataset: BE------------
Resumes Read for QA = 100
Resumes Read for QA = 200
Resumes Read for QA = 300
Resumes Read for QA = 400
Resumes Read for QA = 500
Resumes Read for QA = 600
Resumes Read for QA = 700
Resumes Read for QA = 800
Resumes Read for QA = 900
Resumes Read for Q

### Backend Resume Samples

In [118]:
dataFrameDictionary["BE"][:3]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36
BE\be-resume-1.pdf,Groovy,Svn,Java,Phoenix,Headless,Laravel,Api,Aws,Mongodb,Hibernate,Testing,Zap,Jsf,Databases,Production,Websphere,Restdb,Python,Oracle,Ruby,Less,Environments,Django,Cluster,Db,Environment,Sql,Php,Servlets,Postgresql,Webhook,Jdbc,Firebase,Rdbms,,,
BE\be-resume-10.pdf,Nosql,Java,Headless,Rest,Hadoop,Laravel,Flow,Digitalocean,Hibernate,Spring,Jsf,Websphere,Oracle,Ruby,Flask,Cluster,Db,Rxdb,Tomcat,Sql,Php,Netlify,Junit,Ajax,Postgresql,,,,,,,,,,,,
BE\be-resume-1000.pdf,Groovy,Mysql,Java,Phoenix,Headless,Dbms,Apollo,Laravel,Digitalocean,Aws,Api,Hibernate,Spring,Automation,Database,Jsf,Security,Restdb,Python,Ruby,Cisco,Flask,Rxdb,Db,Tomcat,Sql,Netlify,Servlets,Junit,Jdbc,Jms,Rdbms,,,,,


### Front End Resume Samples

In [119]:
dataFrameDictionary["FE"][:3]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51
FE\fe-resume-1.pdf,Flutter,Babel,Bson,Flow,Gatsbyjs,Node,Typescript,Carlo,Expressjs,Fetch,Jasmine,Emberjs,Karma,Gulp,Rxjs,Angularjs,Rollup,Http,Web,Json,Module,Ui,Wireframe,Emmet,Electron,Jshint,Uikit,Aria,Mobx,Pwa,Grunt,Async,Css,Cssom,Mocha,Ecmascript,Html,Js,Jquery,Scss,,,,,,,,,,,,
FE\fe-resume-10.pdf,Pnpm,Spa,Javascript,Babel,Bson,Svg,Angular,Webpack,Flow,Redux,Bem,Expressjs,Fetch,Mern,Emberjs,Karma,Ecmascript,Rollup,Angularjs,Web,Json,Ui,Wireframe,Polyfill,Riot,Vue,Bootstrap,Bom,Mobx,Pwa,Grunt,Jest,Cssom,Mocha,Yarn,Html,Js,,,,,,,,,,,,,,,
FE\fe-resume-1000.pdf,Pnpm,Spa,Reactjs,Javascript,Babel,Bson,Svg,Angular,Webpack,Node,Carlo,Expressjs,Jasmine,Materialize,Sockjs,Rxjs,Web,Module,Less,Ui,Emmet,Polyfill,Riot,Vue,Electron,Modernizer,Handlebars,Aria,Lighthouse,Bom,Mobx,Grunt,Jest,Css,Mocha,Js,Xss,Jquery,,,,,,,,,,,,,,


### QA Resume Samples

In [120]:
dataFrameDictionary["QA"][:5]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
QA\qa-resume-1.pdf,Smoke,Regression,Automated,Ui,Testing,,,,,,
QA\qa-resume-100.pdf,Smoke,Regression,Automation,Ui,Testing,,,,,,
QA\qa-resume-1002.pdf,Automated,Ui,Testing,Database,Administration,White,,,,,
QA\qa-resume-1003.pdf,Smoke,Ui,Testing,White,Black,,,,,,
QA\qa-resume-1004.pdf,Regression,Ui,Automation,Testing,Black,,,,,,


### DevOPs Resume Samples

In [121]:
dataFrameDictionary["DevOps"][:5]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44
DevOps\dop-resume-10.pdf,Postfix,Proxy,Wamp,Tcp,Xamp,Apache,Flow,Rhel,Automated,Voip,Firewall,Administration,Security,Networks,Wireless,Kibana,Deployment,Gateway,Elk,Lan,Rds,Environments,Centos,Cluster,Xen,Server,Build,Gcloud,Cpanel,Iam,Wan,Dns,Dhcp,Pcidss,,,,,,,,,,,
DevOps\dop-resume-100.pdf,Staging,Esxi,Windows,Xamp,Emr,Heart,Apache,Dynamodb,Compute,Devops,Security,Production,Wireless,Linux,Deployment,Gateway,Networking,Lan,Rds,Zabbix,Ops,Openvpn,Domain,Centos,Cluster,Xen,Tomcat,Environment,Nginx,Server,Gcloud,Infrastructure,Docker,Wan,Redmine,Virtual,,,,,,,,,
DevOps\dop-resume-1000.pdf,Esxi,Tcp,Windows,Nodes,Heart,Debian,Dynamodb,Amazon,Administration,Devops,Sqs,Networks,Elb,Linux,Wireless,Kibana,Vpn,Nagios,Networking,Rds,Cisco,Cluster,Redhat,Server,Nginx,Gcloud,Udp,Infrastructure,Ntp,Cpanel,Iam,Wan,Ssh,Hardware,Elastic,Virtual,,,,,,,,,
DevOps\dop-resume-1001.pdf,Proxy,Wamp,Tcp,Cloud,Heart,Vmware,Apache,Debian,Grafana,Automated,Firewall,Vpn,Wireless,Elb,Gateway,Networking,Rds,Environments,Centos,Redhat,Tomcat,Server,Nginx,Udp,Ntp,Cpanel,Wan,Virtualization,Hardware,Elastic,,,,,,,,,,,,,,,
DevOps\dop-resume-1003.pdf,Postfix,Wamp,Configuration,Tcp,Nodes,Emr,Cloud,Xamp,Vmware,Rhel,Voip,Dynamodb,Jboss,Devops,Production,Networks,Deployment,Gateway,Lan,Cisco,Openvpn,Cluster,Redhat,Azure,Server,Build,Ntp,Jenkins,Docker,Cpanel,Iam,Wan,Redmine,Virtualization,Elastic,Virtual,,,,,,,,,


### Conversion of Natural Language into Machine readable data

In [122]:
normalizedDataForProcessing = normalizeDataAndWriteToFile('src/data/test/resumes/training_data_for_resumes.csv')

(4890, 227)
(4890, 227)


### Machine Learning Algorithm Training

In [123]:
trainMachineLearningAlgorithm(normalizedDataForProcessing, trainY)

  y = column_or_1d(y, warn=True)


(4890, 226)
(4890, 1)


### Machine Learning Algorithm Testing

In [126]:
pd.DataFrame(testAndClassifyResumes())

Error in Reading File: src/data/test/resumes/Test\Resume - Front End Developer - SAAD BIN SAEED.pdf


Unnamed: 0,Name,Results
0,Test\(Fahad Ali) - (Java Developer) - Radtac T...,QA Resume
1,Test\(Hasham Rasheed) - (Java Developer) - Rad...,Back End Resume
2,Test\(Muhammad Farhan Iqbal) - (Java Developer...,QA Resume
3,Test\0_Usman-Ali-CV-NOV2018-converted.pdf,Front End Resume
4,Test\Abdul Basit Javed.pdf,DevOps Resume
...,...,...
122,Test\Zeeshan Manzoor 1.docx,DevOps Resume
123,Test\Zohaib Ahmed Hassan_DCE_Radtac.pdf,DevOps Resume
124,Test\Zohaib's Resume.pdf,Back End Resume
125,Test\Zubair Ashraf_DCE_Radtac.pdf,DevOps Resume


# Reading and Testing Trained Data from CSV

In [245]:
trainXFile, trainYFile, trainingDF = getTrainingDataFromCSV("src/data/test/resumes/training_data_for_resumes.csv")
trainingDF

(4890, 1)
(4890, 226)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,outputClass
0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0
2,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0
3,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0
4,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4885,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3
4886,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,1,0,3
4887,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,3
4888,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3


In [225]:
trainMachineLearningAlgorithm(trainXFile, trainYFile)

(31, 148)
(31, 1)


  y = column_or_1d(y, warn=True)


In [226]:
pd.DataFrame(testAndClassifyResumes())

Unnamed: 0,Name,Results
0,Test\Abdul WahhabCV.pdf,Back End Resume
1,Test\Adnan.Ghafoor - Resume.pdf,Back End Resume
2,Test\Ahmad Farooq Cheema.docx,Back End Resume
3,Test\Azmar.pdf,Back End Resume
4,Test\Bilal Nawaz.pdf,Back End Resume
5,Test\CV-Bilal Mubarik-SQA Automation Engineer...,Back End Resume
6,Test\CV-QA-Yousaf.pdf,Back End Resume
7,Test\CV_Muhammad Osama-all2.docx,Back End Resume
8,Test\Faizan-resume-march-2019.docx,Back End Resume
9,Test\Farrukh Ehsan Resume SQA .docx,Back End Resume


In [83]:
arr = 'src/data/result/resumes/BE/Abdul WahhabCV.pdf'

In [84]:
arr.rsplit('/', 1)[0]

'src/data/result/resumes/BE'

In [113]:
os.makedirs('src/data/result/resumes/BE')