## Notebook Content

In this notebook, explority data analysis for the scanned data that comes from PDF files

## Import Libraries

In [1]:
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import pandas as pd
import sqlite3 as sql
from collections import Counter
from wordcloud import WordCloud
import re
from sklearn.feature_extraction.text import CountVectorizer ,TfidfVectorizer
import pickle
import matplotlib.pyplot as plt

### Methods to Use Amoung File

In [2]:
def lemmatize_verbs(words):
    """Lemmatize verbs in list of tokenized words"""
    lemmatizer = WordNetLemmatizer()
    lemmas = []
    for word in words:
        lemma = lemmatizer.lemmatize(word, pos='v')
        lemmas.append(lemma)
    return ' '.join(lemmas)

In [3]:
def only_word(list_):
    """
    This method is to return list of words only in the passed list.
    """
    list_to_return = []
    for ele in list_:
        if len(ele) >= 3:
            list_to_return.append(ele)
    return ' '.join(list_to_return)

In [4]:
def clean_method(text):
    """This method depends on lemmatize_verbs, remove_stopwords and clean_text to apply them on passed parameter"""
    if len(text) <= 100:
        return 'd'
    stopwords = nltk.corpus.stopwords.words('english')
    text = text.lower() 
    text = text.replace('\\n', ' ').replace('\\r', ' ')
    text = text.replace('\n', ' ').replace('\r', ' ')
    text = re.sub('http\S+\s*', ' ', text)
    text = re.sub(r'(.)\1+', r'\1', text)
    text = re.sub('[^a-zA-Z]+', ' ', text)
    text = only_word(text.split())
    text = lemmatize_verbs(text.split())
    text = [word for word in text.split() if word not in stopwords]
    return ' '.join(text)

In [5]:
def get_freq_word(TEXT_list):
    """This method get count the frequency of each word in the passed parameter"""
    Vocab_list = ' '.join(TEXT_list)
    temp_dict = Counter(Vocab_list.split()).items()
    freq_df = pd.DataFrame(temp_dict)
    freq_df = freq_df.sort_values(1, ascending = False)
    return freq_df

In [6]:
def remove_domain_stop_words(stop_words):
    """Remove domain specific stop words from the passed parameter"""
    df_after = df.copy()
    for i in range(len(df_after)):
        for word in stop_words:
            pattern = r'\b' + word + r'\b'
            new_text = re.sub(pattern,'', df_after.TEXT[i])
            df_after.TEXT[i] = new_text
    return df_after

In [7]:
def write_pickle(file_name, df_to_write):
    """This method is to save the passed dataframe as a pickle file with the passed name"""
    with open(file_name, 'wb') as handle:
        pickle.dump(df_to_write, handle, protocol = pickle.HIGHEST_PROTOCOL)

### Read Data from SQL

In [8]:
trader_connection = sql.connect('Data/Data_After_PDF_Scanning.db')
df = pd.read_sql('SELECT * FROM Data_After_PDF_Scanning', trader_connection)

### Drop Index Column

In [9]:
df = df.drop('index',axis=1)

### Explore Data

In [10]:
## this step is to display all text in each row
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', -1)

  pd.set_option('max_colwidth', -1)


In [11]:
df.sample(2, random_state = 8)

Unnamed: 0,TEXT,PATH
973,akash pralhad patil akash gmail com carer objective achieve dream life personal details adres kathora date birth gender male city chopda state maharashtra pin code nationality india marital status unmaried religion hindu languages known english hindi marathi hoby interested new technology projects hybrid power generation team member generation electrical power using solar and wind combination education details degre course institution university board cgpa percenta year pasing diploma electrical enginering smt patil inst technology chopda msbte asc colege nashik declaration have declared al the statements made the aplication are true complete corect the best knowledge and belief date place chopda akash pralhad patil powered tcpdf w tcpdf org,/Users/shahadsulaiman/Desktop/nlp_project/pdf/2224.pdf
1087,tam we jie personal particulars carer objective have years experience charge ful set acounts singapore healthcare and venture capital fund industry here seking senior acounts executive payrol position permanent contract where can use experience contribute analytical skil the company that wil give oportunity develop carer work experiences world aset management pte ltd may jul industry investment company profile world aset management pte ltd the parent company singapore based world real estate investment trust reit which received suport from both retail and institutional investors for its public ofering world reit the first chinese specialised logistics and comerce logistics reit listed the sgx and wil focusing investments diversified portfolio income producing real estate primarily used for suply chain management and logistics purposes position title senior acount executive months contract acount receivable acount payable bank reconciliatio prepare variances analysis for month end trial balance reason leave non renewable contract job fortune capital management pte ltd nov feb industry investment company profile established singapore the company enlarged scope networking and busines partners asia including countries the asean region focuses high tech investments specificaly software information technology and the internet areas which singapore companies have strong domain knowledge and core competency fortune singapore provides valuable platform for investment companies enter new markets greater china usa and asean position title senior acount executive acount receivable acount payable bank reconciliation monitor company wekly cashflow prepare profit and los statement statement financial position report financial report directors gender male race chinese age years old marital status single nationality malaysian citizen singapore mobile email adres elvisvjtam gmail com home adres ang kio page quarterly gst submision iras prepare witholding tax form asist anual budgeting reason leave company closed singapore medical group limited aug sep industry healthcare company profile incorporated singapore medical group smg private specialist healthcare provider with network specialties smg has total clinics strategicaly located paragon medical centres with growing network asociates clinics acros singapore and listed the singapore stock exchange sgx since position title acount executive acount receivable acount payable bank reconciliation others prepare month end closing inventory report maintain fixed aset register update audit schedule prepare quarterly gst report reason leave beter exposure gain experience from fund investment company educational background london schol busines finance aca level jan dec advanced diploma aplied acounting london chamber comerce and industry lci level i jan dec diploma computerized acounting tunku abdul rahman colege kuala lumpur jun may diploma international busines skils proficiency proficiency sap intermediate ofice intermediate myob intermediate excel intermediate acpac intermediate language skils english chinese malay aditional personality integrity analytical organized positive work atitude able work independent god leadership able think out box and pleasant personality last drawn expected salary availability aug kindly sms email there pick the cal thank you page,/Users/shahadsulaiman/Desktop/nlp_project/pdf/EC World_Tam Wee Jie_Sr Accounts Exec.pdf


In [12]:
df.head(2)

Unnamed: 0,TEXT,PATH
0,retnawati pasir ris singapore retna gmail com objective obtain more exposure chalenging carer oportunity with wel established organization that requires tea player that self motivated enthusiastic and able make continuous positive contribution the organization education bachelor degre comerce the university wolongong australia aug aug major finance atained high distinctions acounting organisations principles responsible comerce investment analysis and international finance significant projects simulation socialy inovative enterprise analyse causes and efects busines decisions graduate with distinction august diploma busines information technology nanyang polytechnics singapore apr apr achiev overal cumulativ gpa final year project created social media platform using java programing langua and made busines service centre report member international student club nanyang polytechnics bartley secondary schol singapore jan dec apointed vice president ibrary club bartley secondary schol organise various events outdor and indor xperience truston aset management singapore pte ltd fund acountant nov present asist and review reconciliation funds funds adminis trator client nvestor reporting and servicing prepar fund transfers instructions upload and updat hedge fund database monthly comunicat clients and other busines parties resolve any open isues ide ntify exceptions and problems fecting acounting recor and prices comunicating same management and asisting their resolution proces daily trades and setlement with various brokers calculate the net aset values various clients funds eview data for and report any iregularities the fun activity price portfolio securities acordance with aplicable valuation policy required proces payment fund expenses aplicable banking system acord ance with organization controls cordinate resources ensure al work caried out and sla met day day basi prepare cash projection ensure suficient fund the portfolio prepare wekly and monthly newsleter for the investors come the main contact person for finance and operations citco fund services singapore operation analyst nov october prepared daily trades position and cash reconciliation compiled nav pack for the client monitor and analysed daily portfolio pricing resolved discrepancies for reconciliation items breaks betwen citco and brokers clients comunicated with client and brokers through emails cals resolve any open isues provided training junior new hires the team partic ipated the monthly team meting and pre onth end planing reviewing over the counter otc master agrements and confirmations liais with various departments within citco for hoc projects such data conectivity and system set responsible for being the ain contact person for fun reconciliations analysed profit and los for the fund the preparation the monthly nav pack suport the conversion any new clients and existing migrations suported other busines units during peak periods ibm singapore singapore sal team internship aug nov reduced the numbers non responded oportunities caled and aproached every client solve problems helped the team solve daily queries did data entries with details required trained new coleague compet encies computers databases aplications paladyne mysql aces blomberg windows axi investor aexeo ofice word powerpoint excel aces languages spoken writen english excelent bahasa indonesia excelent bahasa melayu excele teochew excelent mandarin basic personal nationality atributes notice period indonesian singapore strong analytical god leader and team player god interpersonal and omunication skils decision making and problem solvin skils work wel under presure systematic and highly detailed oriented one month,/Users/shahadsulaiman/Desktop/nlp_project/pdf/Truston AM_Retnawati_FA_Hedge.pdf
1,curiculam vitae hirok jyoti borah instrumentation enginering mail borahirokjyoti gmail com mobile carer objective loking for god position prestigious company where previous skils and capabilities can put eficient use poses efective organiz ational skils and the ability supervise and developed the final output personal details father name prodip borah permanent aders vil bamun pukhuri pachauni gaon bamun pukhuri pin dist jorhat state asam date birt religion hindu nationalities indian marit status unmaried sex male language known asamese english hindi experience curent employe techno canada inc project surface wel testing client oil india limited designation daq perator job responsibilities swt equipments operating presure monitoring temperature monitoring and writen field reading shet field analysis colecting liquid sample from data header analysis analysis salinity analysis using reflect meter liquid flow rate using dep stick api analysis al instrumentation conection rigs swt equipments rig and down swt equipments loading and of loading preparing inventory swt equipments and spare maintain daily progres report previous employe dreser rand india pvt ltd client oil and natural gas corporation nazira asam project operation and maintenance gas compresor designation instrumentation technician working period from job responsibilities calibration various instruments like presure gaug presure transmiter presure switch pneumatic controler main tain repair logic control system used gas compresor plant checking and stop types pump acording the case emergency conection various field instru ments from haz ardous area control panel maintain daily progres report aprenticeship certificate one year aprenticeship training indian oil corporation limited education qualification examination board colege institute grade year pasing diploma instrumentation scte hrh the prince wales institute enginering and technology hslc seba bamun pukhuri high schol hslc science ahsec schol strength optimistic energetic with the building liaison and leadership enjoy responsibility and like acept new chalenges declaration declare that al the statements made the above aplication are true and corect the best knowledge place jorhat with regard hirok jyoti borah,/Users/shahadsulaiman/Desktop/nlp_project/pdf/1782.pdf


In [13]:
df.tail(2)

Unnamed: 0,TEXT,PATH
1532,education rice iversity houst texas master science statistics cumulative gpa pected coursew ork advanced statistical method financial time series data analysis machine learning gre quan titati verbal university linois rbana champaign urban linois may bachel ience tural resource environ mental ciences major gpa coursework aplied statistical methods natural resource economics gis natural resource management sat math writing verbal experien course rice university houston fal quantitative financial analytics conducted extensive research and analysis financial statements available sec filings such devised and compared various portfolio selection strategies through fundamental analysis with years historical data from wrds with the best strategy outperforming the benchmark percent based cagr rice iversity houst sumer research asistant prepared and alyzed pulation census data using sas and excel and performed poison gresio alyses the evalence rate hepatitis with respe age hnicity and liver cancer inciden rate conducted hypothesis testing heavy metal polution data gathered houston ship chanel and presented findings the city houston environmental council mini stry environmen satel lite vironment center beijin china sumer gis data analy utilized arcgis and excel analyze geographical data and created individual maps and reports using both ground census data and satelite image data over the past years the national nature reserves maintained and updated natural nature reserve database information for improved acuracy and eficiency using sql and excel asisted with the trans lation the ok state the nat ions cosystems chinese which contained over words course champaign sumer campus mbasador overs campa ign resulted ver uploads the course hero udy latform and donation boks for africa through the course hero knowledge drive movement leveraged viral marketing and social media facilitate significant increases active user trafic and initiated and established partnership with campus organization and profesors earned the course hero hon rol during the mon october outstanding performan and highes number oks donated ils act ivities interests technic skils arcgis blomberg jmp matlab python sas sql language fluent mandarin glish activities interests swimin cokin video gaming poker,/Users/shahadsulaiman/Desktop/nlp_project/pdf/3198.pdf
1533,mohamed fahim sulthan mechanical enginer contact email mfahim gmail com skype mfahim hot mail com dubai uae carer objective obtain job mechanical enginer chalenging environment and utilize skils that suports organization growth profile mechanical enginer with year experience mechanical system for the detailed design execution hvac system poses depth knowledge proposal and planing detailed hvac design per ashrae codes hvac load calculation ventilation calculation ducting and piping layout creating drawings isuing boq site supervision quantity surveying biling verification cordination field enginering profesional experience poja aircons lp daikin dealer chenai india worked hvac enginer from march months projects enginering and technical asistance procurement designing aprox tons refrigeration capacity consisting vrv units with ducted air handling units fan coil units pmi enginering exports pvt chenai india enginering and technical asistance procurement designing aprox tons refrigeration capacity consisting vrv units with cei ling mounted ducted units ceiling mounted casete units wal mounted split units sundaramorthy residency ranipet india enginering and technical asistance procurement designing aprox tons refrigeration capacity consisting vrv units with ceiling mounted ducted units ceiling mounted casete units wal mounted split units bajaj finance raheja towers chenai enginering and technical asistance procurement designing aprox tons refrigeration capacity consisting vrv units with duct routed air handling units fan coil units pmi global solutions chenai india role preparation data shets specification and material requisition for hvac system such vrv ahu package units ducts pumps lves exhaust fans etc review and provide hvac input electrical civil and instrumentation department for cordination designing site initial period and change the design duct rot way per customer requirements review vendor drawings eply vendor queri folow with vendors etc planing weks lok ahead months material enginering such propose and update earned value wekly basis finalize upcoming activities identify forthcoming mising requirements manpower material design report performance dev iations site performance against planed performance site supervision quantify al tasks set duration depending productivity requirement per planing quantity surveying biling verification inspecting site progres for work produced contractor sub contractor and initiate rfi with and inspect with client for al erection works per client aproved itp inspection test plan overseing quality control and safety procedures site preparing test packages for hyd test pipe rks internship plant operator traine from january may months valuthur gas power station ramnad india role study gas turbines boiler pumps heat exchangers plant safety monitoring and operation water treatment pumps firefighting pumps periodical inspections pumps check valves gate valves safety valves pipe lines sumps reporting presure readings and wekly records asistant plant enginer area interest hvac fire fighting manufacturing academic qualification bachelor degre tech mechanical enginering june abdur rahman university chenai diploma diploma mechanical enginering dme april mohamed sathak polytechnic colege kilakarai dote technical skils software mech autocad hap mcquay duct sizer creo ansys and cad utility software microsoft ofice word excel and powerpoint moviemaker and adobe photoshop achievements ishrae student membership certificate membership certificate achievement automobile enginering cicp english language asesment test certificate cambridge university academic projects strength improvement tig dresed mild stel design and fabrication pneumatic backhoe design and fabrication auto cradle strength lingual english tamil dedication work highly comunicative and interpersonal skils personal information national ity shahul hamed indian date birth marital status single permanent adres pasport visa status athiyilai stret kilakarai ramnad visit visa hereby declare that above mentioned information corect and best knowledge yours sincerely mohamed fahim sulthan,/Users/shahadsulaiman/Desktop/nlp_project/pdf/569.pdf


In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1534 entries, 0 to 1533
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   TEXT    1534 non-null   object
 1   PATH    1534 non-null   object
dtypes: object(2)
memory usage: 24.1+ KB


In [15]:
#Checking for nulls
df.isnull().sum()

TEXT    0
PATH    0
dtype: int64

In [16]:
#Checking for duplicates
print(df.duplicated().sum())

0


In [17]:
df.shape

(1534, 2)

### Start Resume Text Cleaning

In [18]:
# clean text and any empty text will return 'd'
df['TEXT'] = df['TEXT'].apply(lambda x: clean_method(x))

In [19]:
# Remove empty text
df = df[~(df.TEXT == 'd')]

In [20]:
df.head(2)

Unnamed: 0,TEXT,PATH
0,retnawati pasir ris singapore retna gmail com objective obtain exposure chalenging carer oportunity wel establish organization require tea player self motivate enthusiastic able make continuous positive contribution organization education bachelor degre comerce university wolongong australia aug aug major finance atained high distinctions acounting organisations principles responsible comerce investment analysis international finance significant project simulation socialy inovative enterprise analyse cause efects busines decisions graduate distinction august diploma busines information technology nanyang polytechnics singapore apr apr achiev overal cumulativ gpa final year project create social media platform use java program langua make busines service centre report member international student club nanyang polytechnics bartley secondary schol singapore jan dec apointed vice president ibrary club bartley secondary schol organise various events outdor indor xperience truston aset management singapore pte ltd fund acountant nov present asist review reconciliation fund fund adminis trator client nvestor report service prepar fund transfer instructions upload updat hedge fund database monthly comunicat clients busines party resolve open isues ide ntify exceptions problems fecting acounting recor price comunicating management asisting resolution proces daily trade setlement various broker calculate net aset value various clients fund eview data report iregularities fun activity price portfolio securities acordance aplicable valuation policy require proces payment fund expense aplicable bank system acord ance organization control cordinate resources ensure work caried sla meet day day basi prepare cash projection ensure suficient fund portfolio prepare wekly monthly newsleter investors come main contact person finance operations citco fund service singapore operation analyst nov october prepare daily trade position cash reconciliation compile nav pack client monitor analyse daily portfolio price resolve discrepancies reconciliation items break betwen citco broker clients comunicated client broker email cals resolve open isues provide train junior new hire team partic ipated monthly team meting pre onth end plan review counter otc master agrements confirmations liais various departments within citco hoc project data conectivity system set responsible contact person fun reconciliations analyse profit los fund preparation monthly nav pack suport conversion new clients exist migrations suported busines units peak periods ibm singapore singapore sal team internship aug nov reduce number non respond oportunities caled aproached every client solve problems help team solve daily query data entries detail require train new coleague compet encies computers databases aplications paladyne mysql ace blomberg windows axi investor aexeo ofice word powerpoint excel ace languages speak writen english excelent bahasa indonesia excelent bahasa melayu excele teochew excelent mandarin basic personal nationality atributes notice period indonesian singapore strong analytical god leader team player god interpersonal omunication skils decision make problem solvin skils work wel presure systematic highly detail orient one month,/Users/shahadsulaiman/Desktop/nlp_project/pdf/Truston AM_Retnawati_FA_Hedge.pdf
1,curiculam vitae hirok jyoti borah instrumentation enginering mail borahirokjyoti gmail com mobile carer objective loking god position prestigious company previous skils capabilities put eficient use pose efective organiz ational skils ability supervise develop final output personal detail father name prodip borah permanent aders vil bamun pukhuri pachauni gaon bamun pukhuri pin dist jorhat state asam date birt religion hindu nationalities indian marit status unmaried sex male language know asamese english hindi experience curent employe techno canada inc project surface wel test client oil india limit designation daq perator job responsibilities swt equipments operate presure monitor temperature monitor writen field read shet field analysis colecting liquid sample data header analysis analysis salinity analysis use reflect meter liquid flow rate use dep stick api analysis instrumentation conection rig swt equipments rig swt equipments load load prepare inventory swt equipments spare maintain daily progres report previous employe dreser rand india pvt ltd client oil natural gas corporation nazira asam project operation maintenance gas compresor designation instrumentation technician work period job responsibilities calibration various instrument like presure gaug presure transmiter presure switch pneumatic controler main tain repair logic control system use gas compresor plant check stop type pump acording case emergency conection various field instru ments haz ardous area control panel maintain daily progres report aprenticeship certificate one year aprenticeship train indian oil corporation limit education qualification examination board colege institute grade year pasing diploma instrumentation scte hrh prince wales institute enginering technology hslc seba bamun pukhuri high schol hslc science ahsec schol strength optimistic energetic build liaison leadership enjoy responsibility like acept new chalenges declaration declare statements make aplication true corect best knowledge place jorhat regard hirok jyoti borah,/Users/shahadsulaiman/Desktop/nlp_project/pdf/1782.pdf


### Domain Specific Stopwords

In [21]:
freq_words = get_freq_word(list(df.TEXT))

In [22]:
freq_words.columns = ['WORD', 'COUNT']
freq_words.head(20)

Unnamed: 0,WORD,COUNT
171,work,4568
104,management,4338
321,company,3859
48,project,3688
503,financial,3462
107,fund,3295
83,report,3145
210,team,2975
56,busines,2844
81,service,2700


In [23]:
freq_words.to_csv('freq_words.csv')

after take a decision about what is our domain specific word, we will start remove them from the dataset

In [24]:
words_df = pd.read_csv('freq_words_after_delete_col.csv')

In [25]:
words_df.drop('Unnamed: 0', axis = 1, inplace = True)

In [26]:
words_df.fillna(0, inplace = True)

In [27]:
words_df

Unnamed: 0,WORD,COUNT,DELETE
0,work,4568,1.0
1,management,4338,0.0
2,company,3859,1.0
3,project,3688,1.0
4,financial,3462,0.0
...,...,...,...
50943,baid,1,0.0
50944,vyapar,1,0.0
50945,aur,1,0.0
50946,desh,1,0.0


In [28]:
domain_stop_words_df = words_df[words_df.DELETE == 1]

In [29]:
domain_stop_words_df.drop('DELETE', axis = 1, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [30]:
domain_stop_words_df

Unnamed: 0,WORD,COUNT
0,work,4568
2,company,3859
3,project,3688
6,report,3145
7,team,2975
...,...,...
197,participate,539
199,feb,536
201,oct,527
204,one,524


In [31]:
df_after = remove_domain_stop_words(list(domain_stop_words_df.WORD))

to ensure that we are correctly remove the domain stop words

In [32]:
df_after.head(2)

Unnamed: 0,TEXT,PATH
0,retnawati pasir ris retna objective obtain exposure chalenging oportunity establish organization require tea player self motivate enthusiastic able continuous positive contribution organization degre comerce wolongong australia finance atained distinctions acounting organisations principles comerce investment analysis finance significant simulation socialy inovative enterprise analyse cause efects busines decisions graduate distinction august diploma busines information technology nanyang polytechnics apr apr achiev overal cumulativ gpa final create social media platform java program langua busines service centre student club nanyang polytechnics bartley secondary apointed vice president ibrary club bartley secondary organise events outdor indor xperience truston management pte fund acountant asist reconciliation fund fund adminis trator client nvestor service prepar fund transfer instructions upload updat hedge fund database comunicat clients busines party resolve open ide ntify exceptions problems fecting acounting recor price comunicating management asisting resolution trade setlement broker calculate net value clients fund eview data iregularities fun activity price securities acordance aplicable valuation policy require payment fund expense aplicable system acord ance organization control cordinate resources ensure caried sla meet basi cash projection ensure suficient fund wekly newsleter investors come main contact person finance operations citco fund service operation analyst october trade cash reconciliation compile nav pack client monitor analyse price resolve discrepancies reconciliation items break betwen citco broker clients comunicated client broker cals resolve open junior hire partic ipated pre onth end counter otc master agrements confirmations liais departments within citco hoc data conectivity system contact person fun reconciliations analyse profit los fund nav pack suport conversion clients exist migrations suported busines units peak periods ibm sal internship reduce number non respond oportunities caled aproached every client solve problems help solve query data entries require coleague compet encies computers databases aplications paladyne mysql ace blomberg windows axi investor aexeo ofice word powerpoint excel ace speak writen english excelent bahasa indonesia excelent bahasa melayu excele teochew excelent mandarin basic nationality atributes notice period indonesian strong analytical leader player interpersonal omunication decision problem solvin presure systematic highly orient month,/Users/shahadsulaiman/Desktop/nlp_project/pdf/Truston AM_Retnawati_FA_Hedge.pdf
1,curiculam vitae hirok jyoti borah instrumentation enginering mail borahirokjyoti objective loking prestigious previous capabilities put eficient pose efective organiz ational ability supervise develop final output father prodip borah permanent aders vil bamun pukhuri pachauni gaon bamun pukhuri pin dist jorhat state asam birt religion hindu nationalities marit unmaried sex male language know asamese english hindi curent employe techno canada inc surface client oil designation daq perator swt equipments operate presure monitor temperature monitor writen field read shet field analysis colecting liquid sample data header analysis analysis salinity analysis reflect meter liquid flow rate dep stick api analysis instrumentation conection rig swt equipments rig swt equipments load load inventory swt equipments spare maintain progres previous employe dreser rand pvt client oil natural gas corporation nazira asam operation maintenance gas compresor designation instrumentation technician period calibration instrument like presure gaug presure transmiter presure switch pneumatic controler main tain repair logic control system gas compresor check stop type pump acording case emergency conection field instru ments haz ardous area control panel maintain progres aprenticeship aprenticeship oil corporation qualification examination grade pasing diploma instrumentation scte hrh prince wales enginering technology hslc seba bamun pukhuri hslc science ahsec strength optimistic energetic build liaison leadership enjoy responsibility like acept chalenges declaration declare statements aplication true corect jorhat regard hirok jyoti borah,/Users/shahadsulaiman/Desktop/nlp_project/pdf/1782.pdf


In [33]:
df_after.tail(2)

Unnamed: 0,TEXT,PATH
1532,rice iversity houst texas master science statistics cumulative gpa pected coursew ork advance statistical method financial series data analysis machine learn gre quan titati verbal linois rbana champaign urban linois bachel ience tural resource environ mental ciences gpa coursework aplied statistical methods natural resource economics gi natural resource management sit math write verbal experien rice houston fal quantitative financial analytics extensive research analysis financial statements available sec file devise compare selection strategies fundamental analysis historical data wrds strategy outperform benchmark percent cagr rice iversity houst sumer research asistant alyzed pulation census data sas excel poison gresio alyses evalence rate hepatitis respe age hnicity liver cancer inciden rate hypothesis heavy metal polution data gather houston ship chanel find city houston environmental council mini stry environmen satel lite vironment center beijin sumer gi data analy utilize arcgis excel analyze geographical data create individual map grind census data satelite image data past nature reserve maintain natural nature reserve database information improve acuracy eficiency sql excel asisted trans lation state nat ions cosystems chinese contain word champaign sumer campus mbasador overs campa ign result ver upload hero udy latform donation boks africa hero drive movement leverage viral market social media facilitate significant increase active user trafic initiate establish partnership campus organization profesors earn hero hon rol mon october outstanding performan highes number oks donate ils act ivities technic arcgis blomberg jmp matlab python sas sql language fluent mandarin glish swimin cokin video game poker,/Users/shahadsulaiman/Desktop/nlp_project/pdf/3198.pdf
1533,mohamed fahim sulthan mechanical enginer contact mfahim skype mfahim hot mail dubai uae objective obtain mechanical enginer chalenging environment utilize suports organization growth profile mechanical enginer mechanical system design execution hvac system pose depth proposal hvac design ashrae cod hvac load calculation ventilation calculation ducting pip layout create draw isuing boq supervision quantity survey biling verification cordination field enginering poja aircons daikin dealer chenai hvac enginer march months enginering technical asistance procurement design aprox tons refrigeration capacity consist vrv units ducted air units fan coil units pmi enginering export pvt chenai enginering technical asistance procurement design aprox tons refrigeration capacity consist vrv units cei ling mount ducted units ceiling mount casete units wal mount split units sundaramorthy residency ranipet enginering technical asistance procurement design aprox tons refrigeration capacity consist vrv units ceiling mount ducted units ceiling mount casete units wal mount split units bajaj finance raheja tower chenai enginering technical asistance procurement design aprox tons refrigeration capacity consist vrv units duct rout air units fan coil units pmi solutions chenai role data shets specification material requisition hvac system vrv ahu package units ducts pump lves exhaust fan hvac input electrical civil instrumentation cordination design initial period change design duct rot way customer requirements vendor draw eply vendor queri folow vendors weks lok ahead months material enginering propose earn value wekly basis finalize upcoming identify forthcoming mising requirements manpower material design performance dev iations performance performance supervision quantify task duration depend productivity requirement quantity survey biling verification inspect progres produce contractor sub contractor initiate rfi inspect client erection client aproved itp inspection oversee quality control safety package hyd pipe rks internship operator traine january months valuthur gas power station ramnad role gas turbines boiler pump heat exchangers safety monitor operation water treatment pump firefighting pump periodical inspections pump check valves gate valves safety valves pipe line sumps presure read wekly asistant enginer area hvac fire fight manufacture academic qualification degre tech mechanical enginering abdur rahman chenai diploma diploma mechanical enginering dme mohamed sathak polytechnic kilakarai dote technical software mech autocad hap mcquay duct sizer creo ansys cad utility software microsoft ofice word excel powerpoint moviemaker adobe photoshop achievements ishrae student membership membership achievement automobile enginering cicp english language asesment cambridge academic strength improvement tig dresed mild stel design fabrication pneumatic backhoe design fabrication auto cradle strength lingual english tamil dedication highly comunicative interpersonal information ity shahul ham birth marital single permanent pasport visa athiyilai stret kilakarai ramnad visit visa hereby declare mention information corect sincerely mohamed fahim sulthan,/Users/shahadsulaiman/Desktop/nlp_project/pdf/569.pdf


In [34]:
df_after.sample(2, random_state = 8)

Unnamed: 0,TEXT,PATH
973,akash pralhad patil akash objective achieve dream life kathora birth gender male city chopda state maharashtra pin code nationality marital unmaried religion hindu know english hindi marathi hoby technology hybrid power generation generation electrical power solar wind combination degre institution cgpa percenta pasing diploma electrical enginering smt patil inst technology chopda msbte asc nashik declaration declare statements aplication true corect belief chopda akash pralhad patil power tcpdf tcpdf org,/Users/shahadsulaiman/Desktop/nlp_project/pdf/2224.pdf
1087,tam jie particulars objective charge ful acounts healthcare venture fund industry seking acounts executive payrol permanent contract contribute analytical skil wil give oportunity develop world management pte industry investment profile world management pte parent world real estate investment reit receive suport retail institutional investors public ofering world reit first chinese specialise logistics comerce logistics reit sgx wil focus investments diversify income produce real estate primarily suply chain management logistics purpose title acount executive months contract acount receivable acount payable reconciliatio variances analysis month end trial balance reason leave non renewable contract fortune management pte industry investment profile establish enlarge scope network busines partner countries asean region focus tech investments specificaly software information technology internet areas strong domain core competency fortune valuable platform investment enter market greater usa asean title acount executive acount receivable acount payable reconciliation monitor wekly cashflow profit los statement statement financial financial directors gender male race chinese age old marital single nationality malaysian citizen elvisvjtam home ang kio page quarterly gst submision iras witholding tax form asist budget reason leave close medical industry healthcare profile incorporate medical smg specialist healthcare provider network specialties smg total clinics strategicaly locate paragon medical centre grow network asociates clinics acros stock exchange sgx since title acount executive acount receivable acount payable reconciliation others month end close inventory maintain fix register audit schedule quarterly gst reason leave beter exposure gain fund investment educational background london busines finance aca advance diploma aplied acounting london chamber comerce industry lci diploma computerize acounting tunku abdul rahman kuala lumpur diploma busines proficiency proficiency sap intermediate ofice intermediate myob intermediate excel intermediate acpac intermediate language english chinese malay aditional personality integrity analytical organize positive atitude able independent leadership able think box pleasant personality last draw expect salary availability kindly sms pick cal thank page,/Users/shahadsulaiman/Desktop/nlp_project/pdf/EC World_Tam Wee Jie_Sr Accounts Exec.pdf


### Drop Path coloumn

In [35]:
df_after.drop('PATH', axis = 1, inplace = True)

In [36]:
df_after

Unnamed: 0,TEXT
0,retnawati pasir ris retna objective obtain exposure chalenging oportunity establish organization require tea player self motivate enthusiastic able continuous positive contribution organization degre comerce wolongong australia finance atained distinctions acounting organisations principles comerce investment analysis finance significant simulation socialy inovative enterprise analyse cause efects busines decisions graduate distinction august diploma busines information technology nanyang polytechnics apr apr achiev overal cumulativ gpa final create social media platform java program langua busines service centre student club nanyang polytechnics bartley secondary apointed vice president ibrary club bartley secondary organise events outdor indor xperience truston management pte fund acountant asist reconciliation fund fund adminis trator client nvestor service prepar fund transfer instructions upload updat hedge fund database comunicat clients busines party resolve open ide ntify exceptions problems fecting acounting recor price comunicating management asisting resolution trade setlement broker calculate net value clients fund eview data iregularities fun activity price securities acordance aplicable valuation policy require payment fund expense aplicable system acord ance organization control cordinate resources ensure caried sla meet basi cash projection ensure suficient fund wekly newsleter investors come main contact person finance operations citco fund service operation analyst october trade cash reconciliation compile nav pack client monitor analyse price resolve discrepancies reconciliation items break betwen citco broker clients comunicated client broker cals resolve open junior hire partic ipated pre onth end counter otc master agrements confirmations liais departments within citco hoc data conectivity system contact person fun reconciliations analyse profit los fund nav pack suport conversion clients exist migrations suported busines units peak periods ibm sal internship reduce number non respond oportunities caled aproached every client solve problems help solve query data entries require coleague compet encies computers databases aplications paladyne mysql ace blomberg windows axi investor aexeo ofice word powerpoint excel ace speak writen english excelent bahasa indonesia excelent bahasa melayu excele teochew excelent mandarin basic nationality atributes notice period indonesian strong analytical leader player interpersonal omunication decision problem solvin presure systematic highly orient month
1,curiculam vitae hirok jyoti borah instrumentation enginering mail borahirokjyoti objective loking prestigious previous capabilities put eficient pose efective organiz ational ability supervise develop final output father prodip borah permanent aders vil bamun pukhuri pachauni gaon bamun pukhuri pin dist jorhat state asam birt religion hindu nationalities marit unmaried sex male language know asamese english hindi curent employe techno canada inc surface client oil designation daq perator swt equipments operate presure monitor temperature monitor writen field read shet field analysis colecting liquid sample data header analysis analysis salinity analysis reflect meter liquid flow rate dep stick api analysis instrumentation conection rig swt equipments rig swt equipments load load inventory swt equipments spare maintain progres previous employe dreser rand pvt client oil natural gas corporation nazira asam operation maintenance gas compresor designation instrumentation technician period calibration instrument like presure gaug presure transmiter presure switch pneumatic controler main tain repair logic control system gas compresor check stop type pump acording case emergency conection field instru ments haz ardous area control panel maintain progres aprenticeship aprenticeship oil corporation qualification examination grade pasing diploma instrumentation scte hrh prince wales enginering technology hslc seba bamun pukhuri hslc science ahsec strength optimistic energetic build liaison leadership enjoy responsibility like acept chalenges declaration declare statements aplication true corect jorhat regard hirok jyoti borah
2,gpa graduation george brown enginerin rice houston texas degre statistics psychology gpa graduation george brown enginering rice houston texas qualifications tec hnical python sql sas unix script excel ace pig latin tableau mapreduce coursework science data mine quantitative financial analytics multivariate analysis financial series stochastic risk management regr esion statistical compute qualifications sas certify programer sas large scale web graph procesing aws sumer hadop pig amazon analyze dataset web graph data bilion vertices compute degre histograms graph show stribut ion webpage linkages demonstrate conectivity web graph web link paterns page rank algorithm measure webp twets sentiment analysis movie recomendation sumer derive sentiment score real twets evaluate popular movies twets python twiter stream api evaluate similarity betwen two movies calculate corelation sentiment score produ ced recomendations similar movies stock performance earn visualization tableau sumer python analysis stock earn performances recent ipo technology pharmaceutical ene rgy entertainment financial industries create interactive dashboa demonstrate post ipo analysis industries file amount tableau direct comunication easy interpretation result ice heat oil futures ana lysis spring analysis excel trend seasonality forward curve heatin oil futures past months propose holistic evaluations phenomenon observe predict general trend heat oil futures evaluati market suporting market customer risk prediction nationwide insurance sumer busines analytics consultant market strategy initiate statistical mod eling combine psycholo gical theories sas asesed customer risk tolerance facilitate efective segmentation target customers acquire proficiency data retrieval teradata database sql ensure concurency coherence qual ity analysis depth understand database stru cture insurance products deliver methodology result executives create sesions explain methods tols coleagues aid promote ovative data analysis aproaches kagle data mine competition fal spring titanic survival prediction achieve acuracy identify survive pasengers construct prediction system sing feature scale manipulation gausian kernel svm algorithm integr ated close inspection specific trend data overules general algorithm computer vision construc ted clasification system ensemble kern svm logistic regresion random forest algorithms clasify image five categories achieve suces rate huricane damage rice spring frequency huricane ocurence damage result mix poison regresion methods predictions basis risk management windstorm insurance
3,cienega avenue los angeles california cloutie remix pati ramsey bortoli makeup artist cloutier remix patiramseybortoli celebrities aron fresh adam carola adrian brody amy adams antonio sabato apolo ohno bily blank bily ray cyrus blythe daner bob newhart bodhi elfman brian setzer oke burke bruce dern charlote martin cheryl burke chet adkins chris noth cody linley corey feldman daryl hanah denis miler derek luke dony osmond draw draw lachey dylan mcdermot edyta sliwinska eliot gould emit smith erin andrews franscico rod riquez gary dourdan george wendt hary hamlim holy madison ian ziering ice cube jack osbourne jery spinger joe pantoliano joey fatone karina smirnof kate goslin kely preston kely osbourne kery fisher kim kardashian kristi yamaguchi kym johnson lanc bas leza gibons mat blanc mathew mconaughey hamer nastasja kinski nick canon nicole egert nigel barker omar sharif peter frampton regina king robert wagner ron livingston suzane sumers tery chin tisha campbel waren sap man zac osen television dance sta season curent abc makeup celebrity circus ful season abc makeup dance war bruno carie ful season abc nitelife nicolodeon makeup bob newhart special lifetime makeup film face value michael miler makeup determination dea michael miler makeup mother testimony julian chojnacki makeup riperman phil sear makeup tough deadly david katz makeup leprechaun rodman flender makeup pol scot bagley makeup zipers clown palace dan golden makeup aint nothin without shop art david katz makeup cheyene warior mark grifith makeup blade warior katz asistant secrets lake suces frank fisher makeup intern cienega avenue los angeles california cloutie remix pati ramsey bortoli makeup artist comercials calaway golf dany levinson makeup odysey golf dany levinson makeup payles shoe dany levinson makeup wels fargo dany levinson makeup hamburger helper dany levinson makeup chicken helper dany levinson makeup tuna helper dany levinson makeup music vidoes jaguar ice cube grag makeup ice frogy frog grag makeup print empire sportswear catalog fit pregnancy glamour headshots style muscle catalog kaiser permanente confidential natural health ocean drive parent people readers digest robinson show beauty suit oscars golden globes emys sag amas emy tanding makeup dance star emy nomination outstanding makeup dance star
4,page ahendra penwal barechina almora mahendra singh penwal goal orient land survey operations management survey draw operations civil construction management schedule survey documentation technical data mainte nance flair adopt modern methodologies systems compliance quality standards ancho ring survey infrastructure activ ities distinguish abilities understand scope technical discusio advise selection equipment ensure compliance prescribe clients statutory regu lations deftnes schedule monitor respect cost resource deployment run quality compliance ensure timely execution asignments skiled design implement stringent systems quality manuals ensur quality standards stag adept inspect alyzing propose ensure caried specific standards prove expertise implement hse ensure meet quality standards specifications materials technical submitals methodology rect construction eficient organize teach long implementation standards skiled development apropriate methods along improvement exist ones distinguish abilities maintain iso standards sound latest sur vey instrument total station auto along techniques onshore oil gas exploration dgps operations road excavation blast methods rock deft colecting survey data highway survey onsite const ruction layout survey traverse survey crew supervision expertise check road gradient ogl colection alignment mark quantity check colection efective comunicator ability view larger picture skiled take busines critical decisions enact improvements align culture organization strategy vision mision objectives since mar gvk power infrastructures telangana surveyor apr shiv vani oil gas exploration service delhi surveyor mar enginers delhi surveyor aprentice abm enginering surveyor gurgaon traine surveyo deliverables enginering operations skil profile management survey construction operations page overal operations execute construction within cost norms organise metings evaluate progres botlenecking anchor construction ensure completion within cost parameters efective resource utilization maximize output survey structural draw operations maintain sketch map legal des criptions survey describe certify asume liability supervise data chart plot map survey kind survey build con struction infrastructure knowledgeable autocad ofice software land survey operations data relevant shape contour gravitation location elevation dimension land land feature near earth surface enginering mapmaking mine land evaluation construction purpose direct survey establish legal boundaries properties legal deds title write descriptions property boundary survey deds lease legal compute geodetic measurements interpret survey data determine shape elevat ions geom orphic topographic feature result survey shape contour location elevation dime nsions land land feature monitor overal operations execute construction within cost norms organize metings evaluate progr botlenecking oversee progres entail progres enginering procurement construction comisioning schedule guidelines joint survey client consult ant order achieve target carying buil survey total station dgps system transfer shi fting cordinates bench mark carying section cros section permanent structure come route line intain sketch map legal descriptions survey order describe certify asume liability academic diploma civil enginering karnataka state open mysore iti land survey govt iti almora utarakhand skil autocad auto loter gogle earth glober maper arc gi ofice word excel powerpoint leica survey ofice geomax survey ofice seismic birth english hindi kumauni please refer anexure execute anexure gvk power infrastructures traverse traverse adjustment bench mark shift topographical survey prop osed power house colecting ogl data border road road alignment revenue survey dam indo border bro road section section quantity calculation biling page supervision wapcos proje hep goriganga river supervision road excavation survey data entry curve fix ordinate establish bench mark traverse data dam calculatio survey conversant dgps system total station auto survey equipment survey verify cal pro file sumary topographical survey cisa airport mumbai modification layout leve ling survey runway structure dgps survey traverse shiv vani oil gas exploration service seismic data acquisition survey oil tank lump area seismic data acquisition survey shiv vani ongc jorhat basin khowai kalyanpur block adjoin bangladesh border west tripura enginers topographical survey propose power generation reliance power generation dadari railway line survey propose pasenger cariage speed track khurja railway station mathura railway station metro station layout survey isbt utam nagar dmrc delhi pier layout underground tunel layout underground utility survey dmrc rou survey dmrc direction dilshad garden noida sector length cng pipe line route survey dhoulakuan azadpur physical survey propose multi stories build plot rohini sector aprox acre fly lay survey molchand raja garden south extn ring road delhi lay dlf shop complex mal multi stories build gurgao lay dlf shop complex mal ulti stories build gurgao survey ntpc dadri pow mark bore hole location propose draw contour survey bsf shoting range campus indore survey dmrc isbt utam nagar isbt rohi delhi ier layout survey raja arden utam agar dmrc delhi water pipe line route survey rutland island man nicobar island wapcos andaman harbor ahw transmision line route survey chandigarh panchkula aprox transmision line route survey nhpc contra basis tawaghat pithoragarh aprox utarakhand abm enginering road alignment survey cpwd border road division indo pak border guj rat sector dlf sewerage gurgaon rvey widen jalandhar bhogpur stretch punjab rites survey stel asansol west bengal isco rites aprox area acre topographical survey prop osed mihan multi hub airport nagpur nagpur maharashtra state consultancy rambol aprox area acre topographical survey propose airport ship park frequency radar establish aval seabird karwar karnataka state area acre mahendra singh penwal
...,...
1529,tan xing rui mark markraytan deloite touche general audit intern audit fieldwork client sit stock take statutory audit require fieldwork asisted control mind map flow ensure apropriate control cast financial statements unite overseas uob trade operations centre troc import trade admin asistant contract procesed aplications clean invoice finance retirement acounts receipt due liaise external customers aplication setlement instructions dbs vickers securities equity setlements intern asisted setlement trade acounts contra acounts sort mail acount statements caried hoc duties administrative suport melbourne comerce double acounting finance polytechnic apr mar diploma financial service financial trade relevant certifications computer competent microsoft word powerpoint excel language fluent english mandarin relevant advance leadership workforce development agency wda september curicular mindef comisioned oficer arm force saf october serve capacity platon comander batal ion artilery asisted supervision sea game fun pack pack distribution polytechnic lifesaving society spls apr lifesaving competitions events certify lifeguard lifesaving society bronze medalion certify ability multitask manage adaptive situations interpersonal comunication
1530,tih investment management investment analyst tih close end equity fund exchange strategic partner parent hedge fund argyle stret management asm tih ben invest alongside asm special situation deal acros tih asm manage asets exces bilion fund milion vanila loan indonesian conglomerate sucesfuly close within wek execution negotiation term investment proposal extensively legal documentation fund milion structure loan indonesian conglomerate face short term liquidity squeze investment proposal investment return scenarios extensively legal documentation research analysis publicly trade bond result signi cant within manage fund research analysis close end fund sucesfuly result activist style investment generate aproximately absolute return month period author investment memo due diligence ntech startup thailand sed nancing round fund venture fund manage tih streamline back save man hours provident partner jakarta indonesia analyst provident investment management busines southeast analyst ben aspects source structure nancing optimise monitor investments culture highly entrepreneurial strong network particularly indonesia general finance investment analysis rst cut analysis often nancial deliver sucinct investment memos range busineses acros industries invest telecomunications infrastructure technology metal mine unconventional oil gas biomas trade restructure growth startup operations malaysia indonesia deploy digital advertise technologies facilitate infusion substantial money investors create inventory management link cash provident biomas busines encapsulate sales forecast inventory ows cash movements enable uper management beter alocate sales ciently manage valuation unconventional oil gas focus production eagle ford shale basin usa specialist determine typical type curve compare management estimate arps decline formula sensitivity analysis mezanine nancing return feasibility grade indonesian coper asx cash waterfal mechanisms enable discovery range tolerable payment kind pik cash margin identify cash swep amount repayment horizon valuation potential mvna virtual network agregator busines aim expand operations south east structure actively engage parent executives input busines development extensive primary secondary research payment landscape indonesia advise principals posible strategic directions acquire debt market finance transactions closely management investment lawyers two bond isuances achieve highest subscribers indonesian tower bersama infrastructure tbk tbig click linkedin recomendations page junguang tan cfa junguang tbig inaugural bond milion regs paper isued par tbig second bond milion regs paper isued par part tbig execute nancing debt programe facility agrements amount bilion syndication hedge counter party tbig public market capitalisation aproximately bilion foreign curency risk management analyse investe rate hedge strategy recomended aproach swap option structure meet hedge policy objectives achieve target cost monitor price diferent hedge instrument worth bilion notional value result competitive bid save treasury milions dolars apic technologies founder apic pioner wireles queue management system restaurants ticktok ticktok invaluable operational tol restaurants manage large crowd efectively without compromise service quality service expect sit milion diners end recently expand spearhead busines development achieve kpis ful vest spring fund tranches acquire close merchant partnerships pitch ticktok numerous venture fund strategic investors advance draft series subscription agrement ticktok acquire chope late undisclosed seven gure consideration deply aspects exit negotiation valuation acounting legal general asembly data science part mas michigan coursera python ace web data python data structure charter financial analyst cfa pased thre cfa exams atempt management magna cum laude dean double degres acountancy busines management finance chian scholarship smu agship ful undergraduate merit scholarship pro ciencies microsoft suite blomberg service python language capabilities native pro ciency english pro ciency mandarin ing hanban busines mandarin advance acreditation conversational bahasa indonesia click linkedin recomendations page
1531,wamala simon peter clif mail theglobeug hotmail clif val yahomail mob profile enthusiastic enjoy part sucesful productive quick grasp ideas concepts developi inovative creative solutions problems able initiative demonstrate motivation require meet tightest deadlines objective loking build technical within suitably chalenging role ken achieve development qualifications makerere iversity ordinary diploma telecomunications enginering uganda comunication technology nakawa radio television electronics technology prt nakawa vocational computer hardware network repair maintenances nakawa vocational uganda advance uganda cleaner production reneweable energy low cost productions uganda cleaner production unido demonstrable competence ind ependently atention ability wilingnes folow protocol enginering ethics span control staf apraisals depth analysis fault diagnose machine demonstrable competence industrial health safety environmental concepts fire fight computer literate administrative comunication skil maintenance supervisor sadolin paint uganda plot stret industrial area box kampala uganda duties management overse repair factory overse maintenance factory outsource part asign duties maintenance dairy wekly maintenance program overse machine operation overse health safety factory supervisor fire world uganda plot faraday bugolobi box kampala uganda duties overse supervise ordinate contractors source materials health safety instalations electrical efluent treatment supervisor phenix logistics uganda duties supervisor efluent treatment electrical section asist mulation implementation production execute instalations service maintenance repair machine function machine operate quality control package products enforce safety control measure ensure operational safety machine equipment safety staf initiate production improvement submit aproval aplying head generator electrical generaly mechanical units pre operational inspection generator system adjust protective relay circuit breakers meet system requirement necesary require form necesary paper timely maner require spond user inquire courteous maner information area asignment resolve complaints eficient timely maner estimate materials equipment require asigned requisition materials require operate facility emergencies acordance establish necesary atained power management electrical electronics repair maintenance transformers switch gear boiler operation trouble shoting boiler maintenance repair compresor maintenance repair meter power distribution basic mechanics basic weld water treatment take keping safety precautions protection hardwar technician world technologies duties repair maintenance computers trouble shoting computer instalations network repair software instalations reference coen oberholster enginering manager kansaiplascon buyungo charles manager phenix logistics semakula joseph manager core network uganda telecom
1532,rice iversity houst texas master science statistics cumulative gpa pected coursew ork advance statistical method financial series data analysis machine learn gre quan titati verbal linois rbana champaign urban linois bachel ience tural resource environ mental ciences gpa coursework aplied statistical methods natural resource economics gi natural resource management sit math write verbal experien rice houston fal quantitative financial analytics extensive research analysis financial statements available sec file devise compare selection strategies fundamental analysis historical data wrds strategy outperform benchmark percent cagr rice iversity houst sumer research asistant alyzed pulation census data sas excel poison gresio alyses evalence rate hepatitis respe age hnicity liver cancer inciden rate hypothesis heavy metal polution data gather houston ship chanel find city houston environmental council mini stry environmen satel lite vironment center beijin sumer gi data analy utilize arcgis excel analyze geographical data create individual map grind census data satelite image data past nature reserve maintain natural nature reserve database information improve acuracy eficiency sql excel asisted trans lation state nat ions cosystems chinese contain word champaign sumer campus mbasador overs campa ign result ver upload hero udy latform donation boks africa hero drive movement leverage viral market social media facilitate significant increase active user trafic initiate establish partnership campus organization profesors earn hero hon rol mon october outstanding performan highes number oks donate ils act ivities technic arcgis blomberg jmp matlab python sas sql language fluent mandarin glish swimin cokin video game poker


### Store Dataset into pickle

In [37]:
write_pickle('df_1_after_cleaning.pickle', df_after)