# Recoding Data in Python

## Import Packages

In [3]:
import pandas as pd

In [4]:
glassdoor = pd.read_csv('../../Data/glassdoor.csv')
glassdoor.head()

Unnamed: 0,Job_title,Company,State,City,Min_Salary,Max_Salary,Job_Desc,Industry,Rating,Date_Posted,Valid_until,Job_Type
0,Chief Marketing Officer (CMO),National Debt Relief,NY,New York,-1,-1,Who We're Looking For:\n\nThe Chief Marketing ...,Finance,4.0,5/8/2020,6/7/2020,FULL_TIME
1,Registered Nurse,Queens Boulevard Endoscopy Center,NY,Rego Park,-1,-1,"Queens Boulevard Endoscopy Center, an endoscop...",,3.0,4/25/2020,6/7/2020,FULL_TIME
2,Dental Hygienist,Batista Dental,NJ,West New York,-1,-1,Part-time or Full-timedental hygienist positio...,,,5/2/2020,6/7/2020,PART_TIME
3,Senior Salesforce Developer,National Debt Relief,NY,New York,44587,82162,Principle Duties & Responsibilities:\n\nAnalyz...,Finance,4.0,5/8/2020,6/7/2020,FULL_TIME
4,"DEPUTY EXECUTIVE DIRECTOR, PROGRAM AND LEGAL A...",National Advocates for Pregnant Women,NY,New York,125410,212901,"For FULL Job Announcement, visit our website: ...",,,4/28/2020,6/7/2020,FULL_TIME


## Recoding State into a New Column

In [5]:
glassdoor.State.value_counts()

NY    750
NJ    150
Name: State, dtype: int64

In [6]:
def state (series):
    if series=="NY":
        return 0
    if series == "NJ":
        return 1
glassdoor['StateR'] = glassdoor['State'].apply(state)

In [7]:
glassdoor.head()

Unnamed: 0,Job_title,Company,State,City,Min_Salary,Max_Salary,Job_Desc,Industry,Rating,Date_Posted,Valid_until,Job_Type,StateR
0,Chief Marketing Officer (CMO),National Debt Relief,NY,New York,-1,-1,Who We're Looking For:\n\nThe Chief Marketing ...,Finance,4.0,5/8/2020,6/7/2020,FULL_TIME,0
1,Registered Nurse,Queens Boulevard Endoscopy Center,NY,Rego Park,-1,-1,"Queens Boulevard Endoscopy Center, an endoscop...",,3.0,4/25/2020,6/7/2020,FULL_TIME,0
2,Dental Hygienist,Batista Dental,NJ,West New York,-1,-1,Part-time or Full-timedental hygienist positio...,,,5/2/2020,6/7/2020,PART_TIME,1
3,Senior Salesforce Developer,National Debt Relief,NY,New York,44587,82162,Principle Duties & Responsibilities:\n\nAnalyz...,Finance,4.0,5/8/2020,6/7/2020,FULL_TIME,0
4,"DEPUTY EXECUTIVE DIRECTOR, PROGRAM AND LEGAL A...",National Advocates for Pregnant Women,NY,New York,125410,212901,"For FULL Job Announcement, visit our website: ...",,,4/28/2020,6/7/2020,FULL_TIME,0


## Recoding City into the Same Variable

In [8]:
glassdoor.City.value_counts()

New York          420
Rego Park          60
Brooklyn           60
Staten Island      60
West New York      30
Williston Park     30
Maspeth            30
Paramus            30
West Orange        30
Lynbrook           30
Fort Lee           30
Jersey City        30
Bronx              30
Mamaroneck         30
Name: City, dtype: int64

In [9]:
cleanup = {"City" : {"New York": 0, "Rego Park": 1, "Staten Island" : 2, "Brooklyn" : 3, "Mamroneck" : 4, "Lynbrook" : 5, "West New York": 6, "Fort Lee": 7, "Williston Park": 8, "Maspeth": 9, "Jersey City": 10, "West Orange": 11, "Bronx": 12, "Paramus": 13}}
glassdoor.replace(cleanup, inplace=True)

In [10]:
glassdoor.head()

Unnamed: 0,Job_title,Company,State,City,Min_Salary,Max_Salary,Job_Desc,Industry,Rating,Date_Posted,Valid_until,Job_Type,StateR
0,Chief Marketing Officer (CMO),National Debt Relief,NY,0,-1,-1,Who We're Looking For:\n\nThe Chief Marketing ...,Finance,4.0,5/8/2020,6/7/2020,FULL_TIME,0
1,Registered Nurse,Queens Boulevard Endoscopy Center,NY,1,-1,-1,"Queens Boulevard Endoscopy Center, an endoscop...",,3.0,4/25/2020,6/7/2020,FULL_TIME,0
2,Dental Hygienist,Batista Dental,NJ,6,-1,-1,Part-time or Full-timedental hygienist positio...,,,5/2/2020,6/7/2020,PART_TIME,1
3,Senior Salesforce Developer,National Debt Relief,NY,0,44587,82162,Principle Duties & Responsibilities:\n\nAnalyz...,Finance,4.0,5/8/2020,6/7/2020,FULL_TIME,0
4,"DEPUTY EXECUTIVE DIRECTOR, PROGRAM AND LEGAL A...",National Advocates for Pregnant Women,NY,0,125410,212901,"For FULL Job Announcement, visit our website: ...",,,4/28/2020,6/7/2020,FULL_TIME,0


## Dummy Coding the Industry

In [11]:
IndustryDummy = pd.get_dummies(glassdoor['Industry'],drop_first=True)

In [12]:
IndustryDummy.head()

Unnamed: 0,Business Services,"Construction, Repair & Maintenance",Finance,Health Care,Information Technology,Retail,Telecommunications
0,0,0,1,0,0,0,0
1,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0
3,0,0,1,0,0,0,0
4,0,0,0,0,0,0,0


In [13]:
glassdoor1 = pd.concat([glassdoor, IndustryDummy],axis=1)

In [14]:
glassdoor1.head()

Unnamed: 0,Job_title,Company,State,City,Min_Salary,Max_Salary,Job_Desc,Industry,Rating,Date_Posted,Valid_until,Job_Type,StateR,Business Services,"Construction, Repair & Maintenance",Finance,Health Care,Information Technology,Retail,Telecommunications
0,Chief Marketing Officer (CMO),National Debt Relief,NY,0,-1,-1,Who We're Looking For:\n\nThe Chief Marketing ...,Finance,4.0,5/8/2020,6/7/2020,FULL_TIME,0,0,0,1,0,0,0,0
1,Registered Nurse,Queens Boulevard Endoscopy Center,NY,1,-1,-1,"Queens Boulevard Endoscopy Center, an endoscop...",,3.0,4/25/2020,6/7/2020,FULL_TIME,0,0,0,0,0,0,0,0
2,Dental Hygienist,Batista Dental,NJ,6,-1,-1,Part-time or Full-timedental hygienist positio...,,,5/2/2020,6/7/2020,PART_TIME,1,0,0,0,0,0,0,0
3,Senior Salesforce Developer,National Debt Relief,NY,0,44587,82162,Principle Duties & Responsibilities:\n\nAnalyz...,Finance,4.0,5/8/2020,6/7/2020,FULL_TIME,0,0,0,1,0,0,0,0
4,"DEPUTY EXECUTIVE DIRECTOR, PROGRAM AND LEGAL A...",National Advocates for Pregnant Women,NY,0,125410,212901,"For FULL Job Announcement, visit our website: ...",,,4/28/2020,6/7/2020,FULL_TIME,0,0,0,0,0,0,0,0


## Continuous to Categorical with the Max Salary Variable

In [15]:
def salary (series): 
    if series < 30000:
        return 0
    if series >= 60000: 
        return 1

glassdoor1['Max_SalaryR'] = glassdoor1['Max_Salary'].apply(salary)

In [16]:
glassdoor1.head()

Unnamed: 0,Job_title,Company,State,City,Min_Salary,Max_Salary,Job_Desc,Industry,Rating,Date_Posted,...,Job_Type,StateR,Business Services,"Construction, Repair & Maintenance",Finance,Health Care,Information Technology,Retail,Telecommunications,Max_SalaryR
0,Chief Marketing Officer (CMO),National Debt Relief,NY,0,-1,-1,Who We're Looking For:\n\nThe Chief Marketing ...,Finance,4.0,5/8/2020,...,FULL_TIME,0,0,0,1,0,0,0,0,0.0
1,Registered Nurse,Queens Boulevard Endoscopy Center,NY,1,-1,-1,"Queens Boulevard Endoscopy Center, an endoscop...",,3.0,4/25/2020,...,FULL_TIME,0,0,0,0,0,0,0,0,0.0
2,Dental Hygienist,Batista Dental,NJ,6,-1,-1,Part-time or Full-timedental hygienist positio...,,,5/2/2020,...,PART_TIME,1,0,0,0,0,0,0,0,0.0
3,Senior Salesforce Developer,National Debt Relief,NY,0,44587,82162,Principle Duties & Responsibilities:\n\nAnalyz...,Finance,4.0,5/8/2020,...,FULL_TIME,0,0,0,1,0,0,0,0,1.0
4,"DEPUTY EXECUTIVE DIRECTOR, PROGRAM AND LEGAL A...",National Advocates for Pregnant Women,NY,0,125410,212901,"For FULL Job Announcement, visit our website: ...",,,4/28/2020,...,FULL_TIME,0,0,0,0,0,0,0,0,1.0


In [17]:
glassdoor1.Max_SalaryR.value_counts()

0.0    445
1.0    335
Name: Max_SalaryR, dtype: int64