### Library Imports

In [25]:
from info_extractor import InfoExtractor
import os
import pandas as pd
from nltk.stem import WordNetLemmatizer, SnowballStemmer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.feature_extraction import DictVectorizer
from sklearn.neighbors import NearestNeighbors
from sklearn.linear_model import LogisticRegression
import numpy as np
import shutil 

pd.options.display.max_columns = None

### Reading resume paths for Training Data

In [26]:
trainResumePathDictionary = {}
trainResumeSkillsDictionary = {}
trainY = []
resumeBaseUrl = "training-data/"
processingSet = ['FE', 'BE', 'QA', 'DevOps']
dataFrameDictionary = {}
try:
    for currentSet in processingSet:
        currentPath = resumeBaseUrl + currentSet
        trainResumePathDictionary[currentSet] = [os.path.join(currentPath, f) for f in os.listdir(currentPath) if os.path.isfile(os.path.join(currentPath, f))]
except:
    print('Error')
    pass


### Utility Functions

In [27]:
resumeVectorizer = CountVectorizer()
testAlgo = LogisticRegression(solver='lbfgs', multi_class='auto')
def prepareResumeNameAsIndex(resumesList):
    indexes = {}
    for i in range(len(resumesList)):
        indexes[i] = resumesList[i].split("/")[len(resumesList[i].split("/")) - 1]
    return indexes

def prepareOutputClassesForTrainingSet(currentSet):
    if currentSet == 'FE':
         trainY.append(0)
    elif currentSet == 'BE':
        trainY.append(1)
    elif currentSet == 'QA':
        trainY.append(2)
    elif currentSet == 'DevOps':
        trainY.append(3)

def extractTrainingText(resumes, currentSet):
    countFilesRead = 0
    trainResumeSkillsDictionary[currentSet] = []
    tempSplittedTextForDataFrame = []
    tempSplittedTextContainerForDataFrame = []
    currentResumeDataFrame = {}
    for currentResume in resumes:
        countFilesRead += 1
        if countFilesRead % 100 == 0:
            print("Resumes Read for " + currentSet + " = " + str(countFilesRead))
        tempSplittedTextForDataFrame = InfoExtractor.extractSkills(currentResume)
        tempSplittedTextContainerForDataFrame.append(tempSplittedTextForDataFrame)
        individualResumeSkills = " ".join(tempSplittedTextForDataFrame)
        trainResumeSkillsDictionary[currentSet].append(individualResumeSkills)
        prepareOutputClassesForTrainingSet(currentSet)
    currentResumeDataFrame = pd.DataFrame(tempSplittedTextContainerForDataFrame)
    tempSplittedTextContainerForDataFrame = []
    tempSplittedTextForDataFrame = []
    currentResumeDataFrame.rename(index=prepareResumeNameAsIndex(trainResumePathDictionary[currentSet]), inplace=True)
    return currentResumeDataFrame

def trainDataSet():
    for currentSet in processingSet:
        dataFrameDictionary[currentSet] = extractTrainingText(trainResumePathDictionary[currentSet], currentSet)
        print('----------Extraction completed for dataset: ' + currentSet + '------------')
        
def fetchValuesForTraining(currentDataset):
    tempSkillsToTrainSet = []
    for currentSet in processingSet:
        tempSkillsToTrainSet += currentDataset[currentSet]
    return tempSkillsToTrainSet

def normalizeLanguageForMachine():
    Resume_Vector = []
    normalizedData = []
    
    skillsToTrain = fetchValuesForTraining(trainResumeSkillsDictionary)
    resumeVectorizer.fit(skillsToTrain)
    
    for text in skillsToTrain:
        vector = resumeVectorizer.transform([text])
        Resume_Vector.append(vector.toarray())
    

    for x in Resume_Vector:
        normalizedData.append(x[0])
        
    return normalizedData

def classifyResumesInFolders(source, destination):
    if not os.path.exists(destination.rsplit('/', 1)[0]):
        os.makedirs(destination.rsplit('/', 1)[0])
    dest = shutil.copyfile(source, destination) 

def classifyTestedResumes(testResumes, predictedResumes):
    resultDestinationBaseUrl = "result/resumes/"
    namesOnly = []
    predictedNames = []
    for i in range(len(testResumes)):
        namesOnly.append(testResumes[i].split("/")[len(testResumes[i].split("/")) - 1])
    for i in range(len(predictedResumes)):
        currentName = namesOnly[i].split("\\")[len(testResumes[i].split("\\")) - 1]
        if predictedResumes[i] == 0:
            classifyResumesInFolders(testResumes[i], resultDestinationBaseUrl + 'FE/' + currentName)
            predictedNames.append("Front End Resume")
        elif predictedResumes[i] == 1:
            classifyResumesInFolders(testResumes[i], resultDestinationBaseUrl + 'BE/' + currentName)
            predictedNames.append("Back End Resume")
        elif predictedResumes[i] == 2:
            classifyResumesInFolders(testResumes[i], resultDestinationBaseUrl + 'QA/' + currentName)
            predictedNames.append("QA Resume")
        elif predictedResumes[i] == 3:
            classifyResumesInFolders(testResumes[i], resultDestinationBaseUrl + 'DevOps/' + currentName)
            predictedNames.append("DevOps Resume")
    return {'Name':namesOnly, 'Results':predictedNames}
    

def testAndClassifyResumes():
    resumePathTest = "test-resumes"
    testResumes = [os.path.join(resumePathTest, f) for f in os.listdir(resumePathTest) if os.path.isfile(os.path.join(resumePathTest, f))]
    skillsToTrainTest = []
    testResume = ""
    for testResume in testResumes:
        testSkills = InfoExtractor.extractSkills(testResume)
        skillsToTrainTest.append(" ".join(testSkills))
    newArrayToPredict = resumeVectorizer.transform(skillsToTrainTest).toarray()
    predictedResumes = testAlgo.predict(newArrayToPredict)
    return classifyTestedResumes(testResumes, predictedResumes)

def trainMachineLearningAlgorithm(normalizedDataForProcessing, trainY):
    trainX = np.array(normalizedDataForProcessing)
    trainY = np.array(trainY)
    trainY = trainY.reshape(-1, 1)
    testAlgo.fit(trainX, trainY)
    print(trainX.shape)
    print(trainY.shape)

#     "src/data/test/resumes/export_dataframe.csv"
def getTrainingDataFromCSV(file):
    trainingSetFromCSV = pd.read_csv(file)
    trainYFromFile = np.array(trainingSetFromCSV['outputClass']).reshape(-1,1)
    trainXFromFile = np.array(trainingSetFromCSV.drop(columns=['outputClass']).values.tolist())
    print(trainYFromFile.shape)
    print(trainXFromFile.shape)
    return trainXFromFile, trainYFromFile, trainingSetFromCSV

def normalizeDataAndWriteToFile(file):
    normalizedDataForProcessing = normalizeLanguageForMachine()
    TransformedResumesData = pd.DataFrame(normalizedDataForProcessing)
    TransformedResumesData = TransformedResumesData.join(pd.DataFrame({'outputClass': trainY}))
    print(TransformedResumesData.shape)
    TransformedResumesData.rename(index=prepareResumeNameAsIndex(fetchValuesForTraining(trainResumePathDictionary)), inplace=True)
    # TransformedResumesData.columns = resumeVectorizer.get_feature_names()
    print(TransformedResumesData.shape)
    export_csv = TransformedResumesData.to_csv (file, index = None, header=True)
    return normalizedDataForProcessing

### Train Resumes

In [28]:
trainDataSet()

Resumes Read for FE = 100
Resumes Read for FE = 200
Resumes Read for FE = 300
Resumes Read for FE = 400
Resumes Read for FE = 500
Resumes Read for FE = 600
Resumes Read for FE = 700
Resumes Read for FE = 800
Resumes Read for FE = 900
Resumes Read for FE = 1000
Resumes Read for FE = 1100
Resumes Read for FE = 1200
----------Extraction completed for dataset: FE------------
Resumes Read for BE = 100
Resumes Read for BE = 200
Resumes Read for BE = 300
Resumes Read for BE = 400
Resumes Read for BE = 500
Resumes Read for BE = 600
Resumes Read for BE = 700
Resumes Read for BE = 800
Resumes Read for BE = 900
Resumes Read for BE = 1000
Resumes Read for BE = 1100
Resumes Read for BE = 1200
----------Extraction completed for dataset: BE------------
Resumes Read for QA = 100
Resumes Read for QA = 200
Resumes Read for QA = 300
Resumes Read for QA = 400
Resumes Read for QA = 500
Resumes Read for QA = 600
Resumes Read for QA = 700
Resumes Read for QA = 800
Resumes Read for QA = 900
Resumes Read for Q

### Backend Resume Samples

In [29]:
dataFrameDictionary["BE"][:3]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36
BE\be-resume-1.pdf,Production,Hibernate,Zap,Environments,Ruby,Svn,Cluster,Environment,Less,Oracle,Restdb,Servlets,Sql,Websphere,Webhook,Postgresql,Php,Python,Django,Groovy,Databases,Phoenix,Testing,Mongodb,Rdbms,Aws,Jsf,Laravel,Headless,Java,Jdbc,Api,Db,Firebase,,,
BE\be-resume-10.pdf,Hadoop,Hibernate,Ajax,Ruby,Cluster,Flask,Oracle,Junit,Digitalocean,Rest,Sql,Websphere,Flow,Postgresql,Netlify,Php,Rxdb,Spring,Nosql,Tomcat,Jsf,Laravel,Headless,Java,Db,,,,,,,,,,,,
BE\be-resume-1000.pdf,Hibernate,Dbms,Apollo,Ruby,Database,Mysql,Flask,Digitalocean,Junit,Restdb,Servlets,Cisco,Sql,Netlify,Python,Rxdb,Groovy,Phoenix,Spring,Automation,Aws,Jsf,Laravel,Tomcat,Rdbms,Headless,Java,Jdbc,Api,Jms,Db,Security,,,,,


### Front End Resume Samples

In [30]:
dataFrameDictionary["FE"][:3]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51
FE\fe-resume-1.pdf,Async,Pwa,Expressjs,Gulp,Gatsbyjs,Http,Module,Angularjs,Fetch,Karma,Electron,Babel,Jquery,Css,Carlo,Flow,Typescript,Scss,Mocha,Rxjs,Json,Rollup,Jshint,Uikit,Ui,Html,Aria,Mobx,Ecmascript,Grunt,Emberjs,Emmet,Js,Flutter,Cssom,Bson,Web,Node,Jasmine,Wireframe,,,,,,,,,,,,
FE\fe-resume-10.pdf,Pwa,Expressjs,Riot,Angularjs,Fetch,Karma,Spa,Babel,Svg,Pnpm,Redux,Flow,Mocha,Json,Yarn,Rollup,Vue,Webpack,Bootstrap,Jest,Ui,Html,Mobx,Bem,Ecmascript,Emberjs,Grunt,Js,Angular,Javascript,Cssom,Bom,Bson,Mern,Polyfill,Web,Wireframe,,,,,,,,,,,,,,,
FE\fe-resume-1000.pdf,Handlebars,Expressjs,Module,Riot,Less,Materialize,Spa,Jquery,Electron,Svg,Pnpm,Css,Babel,Carlo,Mocha,Rxjs,Vue,Webpack,Jest,Ui,Xss,Aria,Mobx,Sockjs,Grunt,Emmet,Js,Angular,Modernizer,Javascript,Bom,Bson,Lighthouse,Reactjs,Polyfill,Web,Node,Jasmine,,,,,,,,,,,,,,


### QA Resume Samples

In [31]:
dataFrameDictionary["QA"][:5]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
QA\qa-resume-1.pdf,Automated,Ui,Smoke,Regression,Testing,,,,,,
QA\qa-resume-100.pdf,Ui,Smoke,Regression,Testing,Automation,,,,,,
QA\qa-resume-1002.pdf,Automated,Ui,Administration,Testing,Database,White,,,,,
QA\qa-resume-1003.pdf,Black,Ui,Smoke,Testing,White,,,,,,
QA\qa-resume-1004.pdf,Black,Ui,Regression,Testing,Automation,,,,,,


### DevOPs Resume Samples

In [32]:
dataFrameDictionary["DevOps"][:5]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44
DevOps\dop-resume-10.pdf,Tcp,Iam,Environments,Xen,Gateway,Cluster,Rds,Wan,Voip,Apache,Server,Elk,Wamp,Firewall,Build,Flow,Networks,Kibana,Rhel,Pcidss,Wireless,Lan,Proxy,Administration,Xamp,Postfix,Centos,Deployment,Automated,Cpanel,Gcloud,Dns,Dhcp,Security,,,,,,,,,,,
DevOps\dop-resume-100.pdf,Production,Networking,Linux,Xen,Gateway,Staging,Cluster,Windows,Rds,Environment,Wan,Zabbix,Esxi,Apache,Heart,Server,Redmine,Emr,Infrastructure,Wireless,Lan,Virtual,Ops,Openvpn,Compute,Xamp,Tomcat,Domain,Centos,Deployment,Dynamodb,Docker,Gcloud,Nginx,Devops,Security,,,,,,,,,
DevOps\dop-resume-1000.pdf,Tcp,Nodes,Networking,Linux,Iam,Hardware,Cluster,Windows,Wan,Rds,Esxi,Server,Heart,Cisco,Nagios,Networks,Elb,Kibana,Infrastructure,Sqs,Amazon,Wireless,Virtual,Ntp,Administration,Redhat,Dynamodb,Vpn,Cpanel,Gcloud,Debian,Ssh,Nginx,Elastic,Devops,Udp,,,,,,,,,
DevOps\dop-resume-1001.pdf,Tcp,Networking,Environments,Hardware,Gateway,Rds,Wan,Apache,Server,Heart,Wamp,Firewall,Grafana,Elb,Wireless,Proxy,Vmware,Ntp,Redhat,Tomcat,Centos,Automated,Vpn,Cloud,Cpanel,Debian,Virtualization,Elastic,Nginx,Udp,,,,,,,,,,,,,,,
DevOps\dop-resume-1003.pdf,Production,Tcp,Nodes,Gateway,Iam,Cluster,Voip,Wan,Redmine,Server,Wamp,Cisco,Build,Jboss,Networks,Emr,Jenkins,Rhel,Azure,Lan,Virtual,Vmware,Ntp,Openvpn,Redhat,Xamp,Postfix,Deployment,Dynamodb,Cloud,Docker,Cpanel,Virtualization,Elastic,Configuration,Devops,,,,,,,,,


### Conversion of Natural Language into Machine readable data

In [33]:
normalizedDataForProcessing = normalizeDataAndWriteToFile('training-data/training_data_for_resumes.csv')

(4890, 227)
(4890, 227)


### Machine Learning Algorithm Training

In [34]:
trainMachineLearningAlgorithm(normalizedDataForProcessing, trainY)

(4890, 226)
(4890, 1)


  y = column_or_1d(y, warn=True)


### Machine Learning Algorithm Testing

In [35]:
pd.DataFrame(testAndClassifyResumes())

Unnamed: 0,Name,Results
0,test-resumes\0_Usman-Ali-CV-NOV2018-converted.pdf,Front End Resume
1,test-resumes\Abdulhaq Shah.pdf,QA Resume
2,test-resumes\Adnan.Ghafoor - Resume (1).pdf,Back End Resume
3,test-resumes\Adnan.Ghafoor - Resume (2).pdf,Back End Resume


# Reading and Testing Trained Data from CSV

In [36]:
trainXFile, trainYFile, trainingDF = getTrainingDataFromCSV("training-data/training_data_for_resumes.csv")
trainingDF

(4890, 1)
(4890, 226)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,outputClass
0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,0,0,0,1,1,0,0,0,1,1,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
1,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0
2,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0
3,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0
4,0,0,0,1,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4885,1,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3
4886,1,0,1,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,1,0,1,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,1,0,3
4887,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,1,0,1,1,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,3
4888,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3


In [37]:
trainMachineLearningAlgorithm(trainXFile, trainYFile)

(4890, 226)
(4890, 1)


  y = column_or_1d(y, warn=True)


In [38]:
pd.DataFrame(testAndClassifyResumes())

Unnamed: 0,Name,Results
0,test-resumes\0_Usman-Ali-CV-NOV2018-converted.pdf,Front End Resume
1,test-resumes\Abdulhaq Shah.pdf,QA Resume
2,test-resumes\Adnan.Ghafoor - Resume (1).pdf,Back End Resume
3,test-resumes\Adnan.Ghafoor - Resume (2).pdf,Back End Resume


In [39]:
arr = 'src/data/result/resumes/BE/Abdul WahhabCV.pdf'

In [40]:
arr.rsplit('/', 1)[0]

'src/data/result/resumes/BE'

In [41]:
os.makedirs('src/data/result/resumes/BE')