In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict, Counter
import csv

In [2]:
survey = pd.read_csv('data/survey_results_public.csv')
survey.head()

Unnamed: 0,Respondent,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,...,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
0,1,I am a student who is learning to code,Yes,Never,The quality of OSS and closed source software ...,"Not employed, and not looking for work",United Kingdom,No,Primary/elementary school,,...,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,14.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
1,2,I am a student who is learning to code,No,Less than once per year,The quality of OSS and closed source software ...,"Not employed, but looking for work",Bosnia and Herzegovina,"Yes, full-time","Secondary school (e.g. American high school, G...",,...,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,19.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
2,3,"I am not primarily a developer, but I write co...",Yes,Never,The quality of OSS and closed source software ...,Employed full-time,Thailand,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)",Web development or web design,...,Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,28.0,Man,No,Straight / Heterosexual,,Yes,Appropriate in length,Neither easy nor difficult
3,4,I am a developer by profession,No,Never,The quality of OSS and closed source software ...,Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",...,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,22.0,Man,No,Straight / Heterosexual,White or of European descent,No,Appropriate in length,Easy
4,5,I am a developer by profession,Yes,Once a month or more often,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Ukraine,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",...,Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,30.0,Man,No,Straight / Heterosexual,White or of European descent;Multiracial,No,Appropriate in length,Easy


In [3]:
print(survey.shape)

(88883, 85)


In [4]:
survey['Hobbyist'].value_counts() / len(survey['Hobbyist'])

Yes    0.801694
No     0.198306
Name: Hobbyist, dtype: float64

In [5]:
# using csv module
import csv
from collections import defaultdict, Counter


with open('data/survey_results_public.csv', 'r', encoding='latin') as f:
    csv_reader = csv.DictReader(f)

#     using defaultdict
#     counts = defaultdict(int)

#     using Counter
    counts = Counter()

    for line in csv_reader:
        counts[line['Hobbyist']] += 1

total = counts['Yes'] + counts['No']
yes_pct = round((counts['Yes'] / total) * 100, 2)
no_pct = round((counts['No'] / total) * 100, 2)

print(yes_pct)
print(no_pct)

80.17
19.83


In [6]:
language_counter = Counter()
languages = survey['LanguageWorkedWith']
# print(len(languages))
# print(languages.shape[0])

null_count = survey['LanguageWorkedWith'].isnull().sum()
# print(null_count)

total = languages.shape[0] - null_count


for eachResponse in languages:
    try:
        split_ans = eachResponse.split(';')
    except AttributeError:
        split_ans = [str(eachResponse)]

    language_counter.update(split_ans)


for lan, val in language_counter.most_common(5):
    print(f"{lan}: {round(val/total * 100 , 2)}%")

JavaScript: 67.63%
HTML/CSS: 63.34%
SQL: 54.29%
Python: 41.62%
Java: 41.02%


Breaking popular languages by developer type

In [7]:
lan_dev_type = survey[['DevType', 'LanguageWorkedWith']]

lan_dev_type = lan_dev_type.dropna(subset=['DevType']).reindex()

lan_dev_type['LanguageWorkedWith'] = lan_dev_type['LanguageWorkedWith'].fillna(
    'NA')

dev_type_info = {}


for dev_types, languages in zip(lan_dev_type['DevType'], lan_dev_type['LanguageWorkedWith']):
    dev_type = dev_types.split(';')
    language = languages.split(';')
    for each_dev_type in dev_type:
        dev_type_info.setdefault(each_dev_type, {
            'total': 0,
            'language_counter': Counter()
        })
        dev_type_info[each_dev_type]['language_counter'].update(language)
        dev_type_info[each_dev_type]['total'] += 1


for dev_type, info in dev_type_info.items():
    print(dev_type)
    for language, value in info['language_counter'].most_common(5):
        language_pct = (value / info['total']) * 100
        language_pct = round(language_pct, 2)

        print(f"\t{language}: {language_pct}%")
    print()

Developer, desktop or enterprise applications
	JavaScript: 67.84%
	HTML/CSS: 64.55%
	SQL: 63.56%
	C#: 53.69%
	Java: 44.69%

Developer, front-end
	JavaScript: 87.72%
	HTML/CSS: 83.62%
	SQL: 58.65%
	Java: 37.6%
	PHP: 35.94%

Designer
	HTML/CSS: 78.88%
	JavaScript: 78.33%
	SQL: 60.18%
	PHP: 40.23%
	Java: 39.44%

Developer, back-end
	JavaScript: 72.23%
	HTML/CSS: 65.42%
	SQL: 64.01%
	Java: 44.03%
	Python: 40.67%

Developer, full-stack
	JavaScript: 86.15%
	HTML/CSS: 78.94%
	SQL: 65.54%
	Java: 40.74%
	Bash/Shell/PowerShell: 37.91%

Academic researcher
	Python: 61.06%
	HTML/CSS: 55.87%
	JavaScript: 54.25%
	SQL: 47.55%
	Java: 42.26%

Developer, mobile
	JavaScript: 67.72%
	HTML/CSS: 62.46%
	Java: 57.21%
	SQL: 51.27%
	C#: 34.34%

Data or business analyst
	SQL: 73.88%
	HTML/CSS: 62.11%
	JavaScript: 61.33%
	Python: 51.86%
	Bash/Shell/PowerShell: 38.43%

Data scientist or machine learning specialist
	Python: 79.33%
	SQL: 58.44%
	JavaScript: 51.38%
	HTML/CSS: 50.43%
	Bash/Shell/PowerShell: 44.49%

D

In [8]:
import csv
from collections import defaultdict, Counter

with open('data/survey_results_public.csv', 'r', encoding='latin') as f:
    csv_reader = csv.DictReader(f)

    dev_type_info = {}

    for line in csv_reader:
        dev_types = line['DevType'].split(';')

        for dev_type in dev_types:
            dev_type_info.setdefault(dev_type, {
                'total': 0,
                'language_counter': Counter()
            })

            languages = line['LanguageWorkedWith'].split(';')
            dev_type_info[dev_type]['language_counter'].update(languages)
            dev_type_info[dev_type]['total'] += 1


for dev_type, info in dev_type_info.items():
    print(dev_type)

    for language, value in info['language_counter'].most_common(5):
        language_pct = (value / info['total']) * 100
        language_pct = round(language_pct, 2)

        print(f'\t{language}: {language_pct}%')
    

NA
	HTML/CSS: 54.9%
	Python: 51.09%
	JavaScript: 50.58%
	Java: 42.71%
	C++: 35.02%
Developer, desktop or enterprise applications
	JavaScript: 67.84%
	HTML/CSS: 64.55%
	SQL: 63.56%
	C#: 53.69%
	Java: 44.69%
Developer, front-end
	JavaScript: 87.72%
	HTML/CSS: 83.62%
	SQL: 58.65%
	Java: 37.6%
	PHP: 35.94%
Designer
	HTML/CSS: 78.88%
	JavaScript: 78.33%
	SQL: 60.18%
	PHP: 40.23%
	Java: 39.44%
Developer, back-end
	JavaScript: 72.23%
	HTML/CSS: 65.42%
	SQL: 64.01%
	Java: 44.03%
	Python: 40.67%
Developer, full-stack
	JavaScript: 86.15%
	HTML/CSS: 78.94%
	SQL: 65.54%
	Java: 40.74%
	Bash/Shell/PowerShell: 37.91%
Academic researcher
	Python: 61.06%
	HTML/CSS: 55.87%
	JavaScript: 54.25%
	SQL: 47.55%
	Java: 42.26%
Developer, mobile
	JavaScript: 67.72%
	HTML/CSS: 62.46%
	Java: 57.21%
	SQL: 51.27%
	C#: 34.34%
Data or business analyst
	SQL: 73.88%
	HTML/CSS: 62.11%
	JavaScript: 61.33%
	Python: 51.86%
	Bash/Shell/PowerShell: 38.43%
Data scientist or machine learning specialist
	Python: 79.33%
	SQL: 58.