In [198]:
%matplotlib notebook

In [209]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
from functools import reduce

In [210]:
# Study data files
tech_div_2014 = "./tech_diversity/2014_tech_diversity.csv"
tech_div_2015 = "./tech_diversity/2015_tech_diversity.csv"
tech_div_2016 = "./tech_diversity/2016_tech_diversity.csv"
tech_div_2017 = "./tech_diversity/2017_tech_diversity.csv"
tech_div_2018 = "./tech_diversity/2018_tech_diversity.csv"

In [212]:
#Read Data Files and store into Pandas DataFrames
tech_div_2014 = pd.read_csv(tech_div_2014)
tech_div_2015 = pd.read_csv(tech_div_2015)
tech_div_2016 = pd.read_csv(tech_div_2016)
tech_div_2017 = pd.read_csv(tech_div_2017)
tech_div_2018 = pd.read_csv(tech_div_2018)

# Gender Merge and Export to CSV

In [216]:
# clean data, male/female, race columns
tech_2014_df = tech_div_2014.loc[:, ["company","female", "male","white","asian","latino","black","multi"]]
tech_2015_df = tech_div_2015.loc[:, ["company","female", "male","white","asian","latino","black","multi"]]
tech_2016_df = tech_div_2016.loc[:, ["company","female", "male","white","asian","latino","black","multi"]]
tech_2017_df = tech_div_2017.loc[:, ["company","female", "male","white","asian","latino","black","multi"]]
tech_2018_df = tech_div_2018.loc[:, ["company","female", "male","white","asian","latino","black","multi"]]

In [217]:
tech_2014_df.columns

Index(['company', 'female', 'male', 'white', 'asian', 'latino', 'black',
       'multi'],
      dtype='object')

In [218]:
#change column names to match the year
tech_2014_df.columns = ['company', 'female_2014', 'male_2014', 'white_2014','asian_2014','latino_2014','black_2014','multi_2014']
tech_2015_df.columns = ['company', 'female_2015', 'male_2015', 'white_2015','asian_2015','latino_2015','black_2015','multi_2015']
tech_2016_df.columns = ['company', 'female_2016', 'male_2016', 'white_2016','asian_2016','latino_2016','black_2016','multi_2016']
tech_2017_df.columns = ['company', 'female_2017', 'male_2017', 'white_2017','asian_2017','latino_2017','black_2017','multi_2017']
tech_2018_df.columns = ['company', 'female_2018', 'male_2018', 'white_2018','asian_2018','latino_2018','black_2018','multi_2018']


In [219]:
# Merge dataframes using an outer join
tech_div_merge = reduce(lambda  left,right: pd.merge(left,right,on=['company'],
                                            how='outer'), data_frames)

In [220]:
#tech_div_merge.set_index('company')

In [221]:
#tech_div_merge

In [222]:
#drop any rows with all NA
tech_div_merge = tech_div_merge.dropna(how = 'all') 


In [223]:
#drop rows not needed, all non-tech companies 
tech_div_merge_drop = tech_div_merge.drop([0,26,27,28,29,30,31])

In [224]:
#GENDER & RACE: Pull 2018 columns only (used for interactive dashboard and to create JSON)
tech_div_dict=tech_div_merge_drop[['company', 'female_2018', 'male_2018', 'white_2018',
                             'asian_2018','latino_2018','black_2018','multi_2018']]
grouped_tech_div = tech_div_dict.groupby(by=['company']).mean()
transposed = grouped_tech_div.transpose()
transposed_dict = transposed.to_dict()
transposed_dict

{'AirBnB ': {'female_2018': 41.0,
  'male_2018': 59.0,
  'white_2018': 50.0,
  'asian_2018': 36.0,
  'latino_2018': 7.0,
  'black_2018': 3.0,
  'multi_2018': 3.0},
 'Amazon': {'female_2018': 39.0,
  'male_2018': 61.0,
  'white_2018': 48.0,
  'asian_2018': 13.0,
  'latino_2018': 13.0,
  'black_2018': 21.0,
  'multi_2018': 0.0},
 'Apple': {'female_2018': 32.0,
  'male_2018': 68.0,
  'white_2018': 54.0,
  'asian_2018': 21.0,
  'latino_2018': 13.0,
  'black_2018': 9.0,
  'multi_2018': 3.0},
 'Cisco': {'female_2018': 24.0,
  'male_2018': 76.0,
  'white_2018': 53.0,
  'asian_2018': 37.0,
  'latino_2018': 5.0,
  'black_2018': 4.0,
  'multi_2018': 1.0},
 'Dell': {'female_2018': 28.0,
  'male_2018': 72.0,
  'white_2018': 69.0,
  'asian_2018': 9.0,
  'latino_2018': 11.0,
  'black_2018': 10.0,
  'multi_2018': 0.0},
 'Etsy ': {'female_2018': 54.0,
  'male_2018': 46.0,
  'white_2018': 79.0,
  'asian_2018': 10.0,
  'latino_2018': 4.0,
  'black_2018': 3.0,
  'multi_2018': 4.0},
 'Facebook': {'female_

In [225]:
#GENDER & RACE 2018 ONLY into JSON
import json
tech_div_json = json.dumps(transposed_dict, indent = 4)
print(tech_div_json)

{
    "AirBnB ": {
        "female_2018": 41.0,
        "male_2018": 59.0,
        "white_2018": 50.0,
        "asian_2018": 36.0,
        "latino_2018": 7.0,
        "black_2018": 3.0,
        "multi_2018": 3.0
    },
    "Amazon": {
        "female_2018": 39.0,
        "male_2018": 61.0,
        "white_2018": 48.0,
        "asian_2018": 13.0,
        "latino_2018": 13.0,
        "black_2018": 21.0,
        "multi_2018": 0.0
    },
    "Apple": {
        "female_2018": 32.0,
        "male_2018": 68.0,
        "white_2018": 54.0,
        "asian_2018": 21.0,
        "latino_2018": 13.0,
        "black_2018": 9.0,
        "multi_2018": 3.0
    },
    "Cisco": {
        "female_2018": 24.0,
        "male_2018": 76.0,
        "white_2018": 53.0,
        "asian_2018": 37.0,
        "latino_2018": 5.0,
        "black_2018": 4.0,
        "multi_2018": 1.0
    },
    "Dell": {
        "female_2018": 28.0,
        "male_2018": 72.0,
        "white_2018": 69.0,
        "asian_2018": 9.0,
       

In [228]:
#2018 Race ONLY into JSON
#Pull 2018 columns only (used for interactive dashboard and to create JSON)
tech_div_dict=tech_div_merge_drop[['company', 'white_2018','asian_2018','latino_2018','black_2018','multi_2018']]
grouped_tech_div = tech_div_dict.groupby(by=['company']).mean()
transposed = grouped_tech_div.transpose()
transposed_dict = transposed.to_dict()
transposed_dict

{'AirBnB ': {'white_2018': 50.0,
  'asian_2018': 36.0,
  'latino_2018': 7.0,
  'black_2018': 3.0,
  'multi_2018': 3.0},
 'Amazon': {'white_2018': 48.0,
  'asian_2018': 13.0,
  'latino_2018': 13.0,
  'black_2018': 21.0,
  'multi_2018': 0.0},
 'Apple': {'white_2018': 54.0,
  'asian_2018': 21.0,
  'latino_2018': 13.0,
  'black_2018': 9.0,
  'multi_2018': 3.0},
 'Cisco': {'white_2018': 53.0,
  'asian_2018': 37.0,
  'latino_2018': 5.0,
  'black_2018': 4.0,
  'multi_2018': 1.0},
 'Dell': {'white_2018': 69.0,
  'asian_2018': 9.0,
  'latino_2018': 11.0,
  'black_2018': 10.0,
  'multi_2018': 0.0},
 'Etsy ': {'white_2018': 79.0,
  'asian_2018': 10.0,
  'latino_2018': 4.0,
  'black_2018': 3.0,
  'multi_2018': 4.0},
 'Facebook': {'white_2018': 49.0,
  'asian_2018': 40.0,
  'latino_2018': 5.0,
  'black_2018': 3.0,
  'multi_2018': 3.0},
 'Flickr': {'white_2018': 45.0,
  'asian_2018': 44.0,
  'latino_2018': 4.0,
  'black_2018': 2.0,
  'multi_2018': 2.0},
 'Google': {'white_2018': 53.0,
  'asian_2018'

In [229]:
#RACE ONLY 2018 ONLY into JSON
import json
tech_div_json = json.dumps(transposed_dict, indent = 4)
print(tech_div_json)

{
    "AirBnB ": {
        "white_2018": 50.0,
        "asian_2018": 36.0,
        "latino_2018": 7.0,
        "black_2018": 3.0,
        "multi_2018": 3.0
    },
    "Amazon": {
        "white_2018": 48.0,
        "asian_2018": 13.0,
        "latino_2018": 13.0,
        "black_2018": 21.0,
        "multi_2018": 0.0
    },
    "Apple": {
        "white_2018": 54.0,
        "asian_2018": 21.0,
        "latino_2018": 13.0,
        "black_2018": 9.0,
        "multi_2018": 3.0
    },
    "Cisco": {
        "white_2018": 53.0,
        "asian_2018": 37.0,
        "latino_2018": 5.0,
        "black_2018": 4.0,
        "multi_2018": 1.0
    },
    "Dell": {
        "white_2018": 69.0,
        "asian_2018": 9.0,
        "latino_2018": 11.0,
        "black_2018": 10.0,
        "multi_2018": 0.0
    },
    "Etsy ": {
        "white_2018": 79.0,
        "asian_2018": 10.0,
        "latino_2018": 4.0,
        "black_2018": 3.0,
        "multi_2018": 4.0
    },
    "Facebook": {
        "white_2018"

In [226]:
# Creating a JSON with 2014-2018 data (used for timeline bar chart)

tech_dict_all_years=tech_div_merge_drop[['company', 'female_2014', 'male_2014', 'white_2014',
                             'asian_2014','latino_2014','black_2014','multi_2014', 'female_2015', 'male_2015', 'white_2015',
                             'asian_2015','latino_2015','black_2015','multi_2015', 'female_2016', 'male_2016', 'white_2016',
                             'asian_2016','latino_2016','black_2016','multi_2016', 'female_2017', 'male_2017', 'white_2017',
                             'asian_2017','latino_2017','black_2017','multi_2017', 'female_2018', 'male_2018', 'white_2018',
                             'asian_2018','latino_2018','black_2018','multi_2018']]
grouped_tech_div_all_years = tech_dict_all_years.groupby(by=['company']).mean()
transposed = grouped_tech_div_all_years.transpose()
transposed_dict_all_years = transposed.to_dict()
transposed_dict_all_years

{'AirBnB ': {'female_2014': nan,
  'male_2014': nan,
  'white_2014': nan,
  'asian_2014': nan,
  'latino_2014': nan,
  'black_2014': nan,
  'multi_2014': nan,
  'female_2015': nan,
  'male_2015': nan,
  'white_2015': nan,
  'asian_2015': nan,
  'latino_2015': nan,
  'black_2015': nan,
  'multi_2015': nan,
  'female_2016': nan,
  'male_2016': nan,
  'white_2016': nan,
  'asian_2016': nan,
  'latino_2016': nan,
  'black_2016': nan,
  'multi_2016': nan,
  'female_2017': nan,
  'male_2017': nan,
  'white_2017': nan,
  'asian_2017': nan,
  'latino_2017': nan,
  'black_2017': nan,
  'multi_2017': nan,
  'female_2018': 41.0,
  'male_2018': 59.0,
  'white_2018': 50.0,
  'asian_2018': 36.0,
  'latino_2018': 7.0,
  'black_2018': 3.0,
  'multi_2018': 3.0},
 'Amazon': {'female_2014': 37.0,
  'male_2014': 63.0,
  'white_2014': 60.0,
  'asian_2014': 13.0,
  'latino_2014': 9.0,
  'black_2014': 15.0,
  'multi_2014': 0.0,
  'female_2015': 37.0,
  'male_2015': 63.0,
  'white_2015': 60.0,
  'asian_2015':

In [227]:
import json
tech_div_all_years_json = json.dumps(transposed_dict_all_years, indent = 4)
print(tech_div_all_years_json)

{
    "AirBnB ": {
        "female_2014": NaN,
        "male_2014": NaN,
        "white_2014": NaN,
        "asian_2014": NaN,
        "latino_2014": NaN,
        "black_2014": NaN,
        "multi_2014": NaN,
        "female_2015": NaN,
        "male_2015": NaN,
        "white_2015": NaN,
        "asian_2015": NaN,
        "latino_2015": NaN,
        "black_2015": NaN,
        "multi_2015": NaN,
        "female_2016": NaN,
        "male_2016": NaN,
        "white_2016": NaN,
        "asian_2016": NaN,
        "latino_2016": NaN,
        "black_2016": NaN,
        "multi_2016": NaN,
        "female_2017": NaN,
        "male_2017": NaN,
        "white_2017": NaN,
        "asian_2017": NaN,
        "latino_2017": NaN,
        "black_2017": NaN,
        "multi_2017": NaN,
        "female_2018": 41.0,
        "male_2018": 59.0,
        "white_2018": 50.0,
        "asian_2018": 36.0,
        "latino_2018": 7.0,
        "black_2018": 3.0,
        "multi_2018": 3.0
    },
    "Amazon": {
     