
<h1 id='part1'>Data Science Blogpost :  Developer's Insight</h1>

In this project we will explore three facts about the Developers from all over the world

1. Which type of Developers are highest in numbers?
2. Which type of Developers "code as hobby" or "contribute to open source"?
3. Which type of Developers use which type of IDEs the most?

In [None]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import TotalCount as t
%matplotlib inline

# Read Data into DataFrame
df = pd.read_csv('./survey_results_public.csv')
schema = pd.read_csv('./survey_results_schema.csv')
df.head()

In [None]:
# Shows the description of the questions
def get_description(column_name, schema=schema):
    """
    INPUT - schema - pandas dataframe with the schema of the developers survey
            column_name - string - name of the column you would like to know about
            
    OUTPUT - desc - string - description of the coulmn
    """
    
    desc = list(schema[schema['Column']==column_name]['Question'])[0]
    return desc

In [None]:
get_description("DeveloperType")

<h2>Part I: Which type of Developers are highest in numbers?</h2>

In [None]:
# List of all the developers
possible_vals = ['Web developer','Desktop applications developer','Mobile developer','DevOps specialist',
                 'Embeded applications/devices developer', 'Database administrator','Developer with a statistics or mathematics background',
                 'Systems administrator','Data scientist','Graphic designer','Quality assurance engineer','Machine learning specialist',
                 'Graphics programming']

In [None]:
# Function to count the coulmns and plot graph
def count_plot(df,title='Types of Developer',plot=True):
    """
    INPUT - df - Dataframe holding DeveloperType
          - title - string - Title of the plot
          - plot - bool providing whether or not you want to plot back
    OUTPUT - dev_df - a dataframe of count of types of developers
           Displays a plot of pretty things related to the DeveloperType column.
    """
    dev_df = df['DeveloperType'].value_counts().reset_index()
    dev_df.rename(columns={'index':'method','DeveloperType':'count'},inplace=True)
    dev_df = t.total_count(dev_df,'method','count',possible_vals)
    
    dev_df.set_index('method',inplace=True)
    
    if plot:
        (dev_df/dev_df.sum()).plot(kind='bar',legend=None);
        plt.title(title);
        plt.show();
    props_dev_df = dev_df/dev_df.sum()
    return props_dev_df


    

In [None]:
# Get graph for Part I
props_df = count_plot(df)

<h2>Part II: Which type of Developers "code as hobby" or "contribute to open source"?</h2>

In [None]:
# Function to count the hobby types
def program_hobby(hobby_str):
    '''
    INPUT
        hobby_str - a string of one of the values from the ProgramHobby column
    
    OUTPUT
        return 1 if the string is in ("Yes, both","Yes, I program as a hobby","Yes, I contribute to open source projects")
        return 0 otherwise
    
    '''
    if hobby_str in ("Yes, both","Yes, I program as a hobby","Yes, I contribute to open source projects"):
        return 1
    else:
        return 0

df["ProgramHobby"].apply(program_hobby)[:8] #Test your function to assure it provides 1 and 0 values for the df

In [None]:
# Call program_hobby() function
df["HobbyOrNot"] = df["ProgramHobby"].apply(program_hobby)

In [None]:
pro_1 = df[df["HobbyOrNot"]==1] # Developers who program as hobby and contribute to open source projects or both
pro_0 = df[df["HobbyOrNot"]==0] #  Developers who do not do both

In [None]:
pro_1_perc = count_plot(pro_1, 'Who program as hobby and contribute to open source projects', plot=False)
pro_0_perc = count_plot(pro_0, 'Who do not do both', plot=False)

In [None]:
# Final Graph
comp_df = pd.merge(pro_1_perc,pro_0_perc,left_index=True,right_index=True)
comp_df.columns = ['pro_1_perc', 'pro_0_perc']
comp_df['Diff_Pro_Vals'] = comp_df['pro_1_perc'] - comp_df['pro_0_perc']
comp_df.style.bar(subset=['Diff_Pro_Vals'], align='mid', color=['#d65f5f', '#5fba7d'])

<h2>Part III: Which type of Developers use which type of IDEs the most?</h2>

In [None]:
# List of IDEs
possible_ide = ['Atom','Notepad++', 'Vim','PyCharm','RubyMine','Visual Studio',' Sublime Text','PHPStorm','Android Studio','IntelliJ',
               'NetBeans','Eclipse','IPython / Jupyter',' Xcode']

In [None]:
# Count the group IDE and DeveloperType
IDE_dev = df.groupby(['IDE','DeveloperType']).size().reset_index(name='count')

In [None]:
# Sort by descending order
IDE_dev = IDE_dev.sort_values("count",ascending=False)

In [None]:
# Count and Plot the usade of IDE by different Developers
def count_plot_ide(df,title,plot,developertype):
    """
    INPUT - df - Dataframe holding IDE
          - title - string - Title of the plot
          - plot - bool providing whether or not you want to plot back
          - developertype - string - Developer Type
    OUTPUT - ide_df - a dataframe of count of types of IDE usage by each developers
           Displays a plot of pretty things related to the IDE usage.
    """
    print(developertype)
    dev_df = df.loc[(df['DeveloperType'].str.contains(developertype)) & (df['count']>2)]
    if dev_df.empty:
        plot = False
    ide_df = dev_df['IDE'].value_counts().reset_index()
    ide_df.rename(columns={'index':'method','IDE':'count'},inplace=True)
    ide_df = t.total_count(ide_df,'method','count',possible_ide)
    ide_df.set_index('method',inplace=True)
   
    if plot:
        (ide_df/ide_df.sum()).plot(kind='bar',legend=None, stacked=True);
        plt.title(title);
        plt.show();
    props_ide_df = ide_df/ide_df.sum()
    return props_ide_df

for val in possible_vals:
    props_df = count_plot_ide(IDE_dev,'Top IDEs used by '+val,True,val)
    