In [14]:
# Dependencies
import pandas as pd
import csv

In [15]:
# load CSV
file = "Resources/2016-FCC-New-Coders-Survey-Data.csv"

In [16]:
# Read with pandas--low_memory required to suppress errors about mixed data types
new_coders = pd.read_csv(file, encoding='iso-8859-1', low_memory=False)
new_coders.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampMonthsAgo,BootcampName,BootcampPostSalary,BootcampRecommend,ChildrenNumber,...,ResourceSoloLearn,ResourceStackOverflow,ResourceTreehouse,ResourceUdacity,ResourceUdemy,ResourceW3Schools,ResourceYouTube,SchoolDegree,SchoolMajor,StudentDebtOwe
0,28.0,0.0,,,,,,,,,...,,,,,,,,"some college credit, no degree",,20000.0
1,22.0,0.0,,,,,,,,,...,,,,,1.0,,,"some college credit, no degree",,
2,19.0,0.0,,,,,,,,,...,,,,,,,,high school diploma or equivalent (GED),,
3,26.0,0.0,,,,,,,,,...,,,,,,,,bachelor's degree,Cinematography And Film,7000.0
4,20.0,0.0,,,,,,,,,...,,,,,,,,"some college credit, no degree",,


In [17]:
# Take only columns 0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111
new_coders = new_coders.iloc[:,[0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111]]
new_coders.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampName,BootcampPostSalary,BootcampRecommend,ChildrenNumber,CityPopulation,...,CountryLive,EmploymentField,EmploymentStatus,Gender,HasChildren,HoursLearning,Income,JobRoleInterest,SchoolDegree,SchoolMajor
0,28.0,0.0,,,,,,,,"between 100,000 and 1 million",...,United States of America,office and administrative support,Employed for wages,male,0.0,30.0,32000.0,,"some college credit, no degree",
1,22.0,0.0,,,,,,,,"between 100,000 and 1 million",...,United States of America,food and beverage,Employed for wages,male,,30.0,15000.0,Front-End Web Developer,"some college credit, no degree",
2,19.0,0.0,,,,,,,,more than 1 million,...,United States of America,finance,Employed for wages,male,,20.0,48000.0,,high school diploma or equivalent (GED),
3,26.0,0.0,,,,,,,,more than 1 million,...,United States of America,"arts, entertainment, sports, or media",Employed for wages,female,,20.0,43000.0,Front-End Web Developer,bachelor's degree,Cinematography And Film
4,20.0,0.0,,,,,,,,"between 100,000 and 1 million",...,United States of America,education,Employed for wages,female,,25.0,6000.0,Full-Stack Web Developer,"some college credit, no degree",


In [18]:
# Change reading 0 and 1 to No and Yes, respectively
reduced_coders = new_coders.replace({0.0:"No", 1.0:"Yes"})
reduced_coders.head() 

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampName,BootcampPostSalary,BootcampRecommend,ChildrenNumber,CityPopulation,...,CountryLive,EmploymentField,EmploymentStatus,Gender,HasChildren,HoursLearning,Income,JobRoleInterest,SchoolDegree,SchoolMajor
0,28.0,No,,,,,,,,"between 100,000 and 1 million",...,United States of America,office and administrative support,Employed for wages,male,No,30,32000.0,,"some college credit, no degree",
1,22.0,No,,,,,,,,"between 100,000 and 1 million",...,United States of America,food and beverage,Employed for wages,male,,30,15000.0,Front-End Web Developer,"some college credit, no degree",
2,19.0,No,,,,,,,,more than 1 million,...,United States of America,finance,Employed for wages,male,,20,48000.0,,high school diploma or equivalent (GED),
3,26.0,No,,,,,,,,more than 1 million,...,United States of America,"arts, entertainment, sports, or media",Employed for wages,female,,20,43000.0,Front-End Web Developer,bachelor's degree,Cinematography And Film
4,20.0,No,,,,,,,,"between 100,000 and 1 million",...,United States of America,education,Employed for wages,female,,25,6000.0,Full-Stack Web Developer,"some college credit, no degree",


In [19]:
# Extract rows for only those who attended a bootcamp
attended_bootcamp = reduced_coders.loc[reduced_coders["AttendedBootcamp"]=="Yes"]
attended_bootcamp.count()

Age                     818
AttendedBootcamp        953
BootcampFinish          933
BootcampFullJobAfter    635
BootcampLoanYesNo       934
BootcampName            895
BootcampPostSalary      330
BootcampRecommend       937
ChildrenNumber          148
CityPopulation          823
CodeEventBootcamp        27
CountryLive             825
EmploymentField         476
EmploymentStatus        792
Gender                  833
HasChildren             211
HoursLearning           925
Income                  430
JobRoleInterest         402
SchoolDegree            832
SchoolMajor             622
dtype: int64

In [20]:
# Create DataFrame of the different boot camps that had a significant number of attendees
bootcamp_name = pd.DataFrame(reduced_coders["BootcampName"].value_counts())
bootcamp_name.reset_index(inplace=True)
bootcamp_name.columns = ["BootcampName", "Count"]
bootcamp_name = bootcamp_name.loc[(bootcamp_name["Count"])>9]
bootcamp_name.head()

Unnamed: 0,BootcampName,Count
0,General Assembly,90
1,Flatiron School,54
2,Dev Bootcamp,48
3,The Iron Yard,40
4,Prime Digital Academy,30


In [21]:
# Count how many attendees of each bootcamp would recommend the bootcam
recommend_bootcamp = attended_bootcamp.replace({"Yes":1, "No":0})
recommend_bootcamp = pd.DataFrame(recommend_bootcamp.groupby("BootcampName")["BootcampRecommend"].sum())
recommend_bootcamp.reset_index(inplace=True)
recommend_bootcamp.columns=["BootcampName", "Amount who do Recommend"]
recommend_bootcamp.head()

Unnamed: 0,BootcampName,Amount who do Recommend
0,10x.org.il,3.0
1,4Geeks Academy,4.0
2,AcadGild,2.0
3,Academia de CÃ³digo,4.0
4,Academic Work Academy,0.0


In [22]:
# Merge the two created data frames on the name of tbe bootcamp
merged_bootcamp = pd.merge(recommend_bootcamp, bootcamp_name, on="BootcampName", how="outer")
merged_bootcamp= merged_bootcamp.dropna(how='any')
merged_bootcamp.head()

Unnamed: 0,BootcampName,Amount who do Recommend,Count
10,App Academy,20.0,22.0
19,Bloc.io,11.0,21.0
28,Code Fellows,15.0,21.0
39,Coder Camps,6.0,11.0
44,Codeup,6.0,10.0


In [23]:
# Calculate percentage of eac bootcamp's students who are recommenders
percent_recommended= (merged_bootcamp["Amount who do Recommend"] / merged_bootcamp["Count"]) * 100
merged_bootcamp["Percent Recommended"] = percent_recommended
merged_bootcamp.head()

Unnamed: 0,BootcampName,Amount who do Recommend,Count,Percent Recommended
10,App Academy,20.0,22.0,90.909091
19,Bloc.io,11.0,21.0,52.380952
28,Code Fellows,15.0,21.0,71.428571
39,Coder Camps,6.0,11.0,54.545455
44,Codeup,6.0,10.0,60.0


In [24]:
# Reformat the column values
merged_bootcamp["Amount who do Recommend"] = merged_bootcamp["Amount who do Recommend"].map("{:}".format)
merged_bootcamp["Percent Recommended"] = merged_bootcamp["Percent Recommended"].map("{:.2f}%".format)
merged_bootcamp.head()

Unnamed: 0,BootcampName,Amount who do Recommend,Count,Percent Recommended
10,App Academy,20.0,22.0,90.91%
19,Bloc.io,11.0,21.0,52.38%
28,Code Fellows,15.0,21.0,71.43%
39,Coder Camps,6.0,11.0,54.55%
44,Codeup,6.0,10.0,60.00%


In [25]:
# Sort the merged dataframe
merged_bootcamp = merged_bootcamp.sort_values("Percent Recommended", ascending=False)
merged_bootcamp.head(20)

Unnamed: 0,BootcampName,Amount who do Recommend,Count,Percent Recommended
117,Turing,26.0,27.0,96.30%
64,Free Code Camp is not a bootcamp - please scro...,14.0,15.0,93.33%
70,Hack Reactor,27.0,29.0,93.10%
62,Flatiron School,50.0,54.0,92.59%
10,App Academy,20.0,22.0,90.91%
84,MakerSquare,18.0,20.0,90.00%
71,Hackbright Academy,19.0,22.0,86.36%
52,Dev Bootcamp,41.0,48.0,85.42%
97,Prime Digital Academy,25.0,30.0,83.33%
114,The Firehose Project,13.0,16.0,81.25%


In [26]:
# Export to excel and remove index
merged_bootcamp.to_excel("output/Bootcamp_Output_Part2_WJones.xlsx", index=False)