In [1]:
import pandas as pd

In [2]:
pd.options.display.float_format = '{:,.2f}'.format

In [3]:
data = pd.read_csv('salaries_by_college_major.csv')

In [4]:
data.head()

Unnamed: 0,Undergraduate Major,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 90th Percentile Salary,Group
0,Accounting,46000.0,77100.0,42200.0,152000.0,Business
1,Aerospace Engineering,57700.0,101000.0,64300.0,161000.0,STEM
2,Agriculture,42600.0,71900.0,36300.0,150000.0,Business
3,Anthropology,36800.0,61500.0,33800.0,138000.0,HASS
4,Architecture,41600.0,76800.0,50600.0,136000.0,Business


In [5]:
data.shape

(51, 6)

In [6]:
data.columns

Index(['Undergraduate Major', 'Starting Median Salary',
       'Mid-Career Median Salary', 'Mid-Career 10th Percentile Salary',
       'Mid-Career 90th Percentile Salary', 'Group'],
      dtype='object')

In [7]:
data.isna()

Unnamed: 0,Undergraduate Major,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 90th Percentile Salary,Group
0,False,False,False,False,False,False
1,False,False,False,False,False,False
2,False,False,False,False,False,False
3,False,False,False,False,False,False
4,False,False,False,False,False,False
5,False,False,False,False,False,False
6,False,False,False,False,False,False
7,False,False,False,False,False,False
8,False,False,False,False,False,False
9,False,False,False,False,False,False


In [8]:
data.tail()

Unnamed: 0,Undergraduate Major,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 90th Percentile Salary,Group
46,Psychology,35900.0,60400.0,31600.0,127000.0,HASS
47,Religion,34100.0,52000.0,29700.0,96400.0,HASS
48,Sociology,36500.0,58200.0,30700.0,118000.0,HASS
49,Spanish,34000.0,53100.0,31000.0,96400.0,HASS
50,Source: PayScale Inc.,,,,,


In [9]:
clean_data = data.dropna()

In [10]:
clean_data.tail()

Unnamed: 0,Undergraduate Major,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 90th Percentile Salary,Group
45,Political Science,40800.0,78200.0,41200.0,168000.0,HASS
46,Psychology,35900.0,60400.0,31600.0,127000.0,HASS
47,Religion,34100.0,52000.0,29700.0,96400.0,HASS
48,Sociology,36500.0,58200.0,30700.0,118000.0,HASS
49,Spanish,34000.0,53100.0,31000.0,96400.0,HASS


In [11]:
clean_data[['Starting Median Salary']]

Unnamed: 0,Starting Median Salary
0,46000.0
1,57700.0
2,42600.0
3,36800.0
4,41600.0
5,35800.0
6,38800.0
7,43000.0
8,63200.0
9,42600.0


In [12]:
clean_data['Starting Median Salary'].max()

74300.0

In [13]:
clean_data['Starting Median Salary'].idxmax()

43

**Find Undergraduate Major with Highest Starting Salary**

In [14]:
clean_data.loc[[clean_data['Starting Median Salary'].idxmax()]]

Unnamed: 0,Undergraduate Major,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 90th Percentile Salary,Group
43,Physician Assistant,74300.0,91700.0,66400.0,124000.0,STEM


**Find Undergraduate Major with Highest Mid-Career Median Salary**

In [15]:
clean_data.loc[[clean_data['Mid-Career Median Salary'].idxmax()]]

Unnamed: 0,Undergraduate Major,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 90th Percentile Salary,Group
8,Chemical Engineering,63200.0,107000.0,71900.0,194000.0,STEM


**Find Undergraduate Major with Lowest Starting Salary**

In [16]:
clean_data.loc[[clean_data['Starting Median Salary'].idxmin()]]

Unnamed: 0,Undergraduate Major,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 90th Percentile Salary,Group
49,Spanish,34000.0,53100.0,31000.0,96400.0,HASS


**Find Undergraduate Major with Lowest Mid-Career Median Salary**

In [17]:
clean_data.loc[[clean_data['Mid-Career Median Salary'].idxmin()]]

Unnamed: 0,Undergraduate Major,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 90th Percentile Salary,Group
18,Education,34900.0,52000.0,29300.0,102000.0,HASS


**Earnings Difference**

In [18]:
earnings_difference = clean_data['Mid-Career 90th Percentile Salary'].subtract(clean_data['Mid-Career 10th Percentile Salary'])

In [19]:
clean_data.insert(6, 'Earnings Difference', earnings_difference)

In [20]:
clean_data.head()

Unnamed: 0,Undergraduate Major,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 90th Percentile Salary,Group,Earnings Difference
0,Accounting,46000.0,77100.0,42200.0,152000.0,Business,109800.0
1,Aerospace Engineering,57700.0,101000.0,64300.0,161000.0,STEM,96700.0
2,Agriculture,42600.0,71900.0,36300.0,150000.0,Business,113700.0
3,Anthropology,36800.0,61500.0,33800.0,138000.0,HASS,104200.0
4,Architecture,41600.0,76800.0,50600.0,136000.0,Business,85400.0


**Lowest Risk**

In [21]:
low_risk = clean_data.sort_values('Earnings Difference')

In [22]:
low_risk[['Undergraduate Major', 'Earnings Difference']].head()

Unnamed: 0,Undergraduate Major,Earnings Difference
40,Nursing,50700.0
43,Physician Assistant,57600.0
41,Nutrition,65300.0
49,Spanish,65400.0
27,Health Care Administration,66400.0


**High Risk**

In [23]:
high_risk = clean_data.sort_values('Earnings Difference', ascending=False)

In [24]:
high_risk[['Undergraduate Major', 'Earnings Difference']].head()

Unnamed: 0,Undergraduate Major,Earnings Difference
17,Economics,159400.0
22,Finance,147800.0
37,Math,137800.0
36,Marketing,132900.0
42,Philosophy,132500.0


**Majors with the Highest Potential**

In [25]:
high_value = clean_data.sort_values('Mid-Career 90th Percentile Salary', ascending=False)

In [26]:
high_value[['Undergraduate Major', 'Mid-Career 90th Percentile Salary']].head()

Unnamed: 0,Undergraduate Major,Mid-Career 90th Percentile Salary
17,Economics,210000.0
22,Finance,195000.0
8,Chemical Engineering,194000.0
37,Math,183000.0
44,Physics,178000.0


**Pivot by Group**

In [27]:
clean_data.groupby('Group')[['Undergraduate Major']].count()

Unnamed: 0_level_0,Undergraduate Major
Group,Unnamed: 1_level_1
Business,12
HASS,22
STEM,16


In [28]:
clean_data.groupby('Group').mean(True)

Unnamed: 0_level_0,Starting Median Salary,Mid-Career Median Salary,Mid-Career 10th Percentile Salary,Mid-Career 90th Percentile Salary,Earnings Difference
Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Business,44633.33,75083.33,43566.67,147525.0,103958.33
HASS,37186.36,62968.18,34145.45,129363.64,95218.18
STEM,53862.5,90812.5,56025.0,157625.0,101600.0
