In [1]:
'''
Pandas is a data manipulation and analysis library in Python.
Pandas provides data structures: Series (1D) & DataFrames (2D, similar to a table) 
to store and manipulate large datasets efficiently. 
Pandas helps: Clean, transform, analyze data. Filter rows. Perform aggregations. 
'''

import pandas as pd

In [2]:
'''
pd.set_option() function allows to change display settings for better control 
on how data is presented. Has different options:
- max_rows
- max_columns
- max_colwidth
- precision

pd.reset_option('all') is used to reset all options to default.
'''

pd.set_option('display.max_columns', 114)
pd.set_option('display.max_rows', 87)

In [3]:
'''
pandas.read_csv(filepath): Reads a comma-separated values (csv) file into a DataFrame. 

Common Parameters:
- filepath_or_buffer: Path to the CSV file or a URL.
- delimiter or sep: Specifies the separator between values (e.g., ',' for commas).
- header: Row number(s) to use as column names. If None, defaults to the first row.
- names: List of column names if no header is present/
- index_col: Column to use as the row labels of the DataFrame. 
- Other params: usecols, dtype, parse_dates, na_values
'''

df = pd.read_csv('data/survey_results_public.csv')
schema_df = pd.read_csv('data/survey_results_schema.csv')

In [4]:
'''
df.head() - Used to view a subset of rows from a DataFrame. 
- Shows the first 5 rows by default. 
- Can specify how many first rows to show (e.g., df.head(10)).
'''

df.head()

Unnamed: 0,ResponseId,MainBranch,Age,Employment,RemoteWork,Check,CodingActivities,EdLevel,LearnCode,LearnCodeOnline,TechDoc,YearsCode,YearsCodePro,DevType,OrgSize,PurchaseInfluence,BuyNewTool,BuildvsBuy,TechEndorse,Country,Currency,CompTotal,LanguageHaveWorkedWith,LanguageWantToWorkWith,LanguageAdmired,DatabaseHaveWorkedWith,DatabaseWantToWorkWith,DatabaseAdmired,PlatformHaveWorkedWith,PlatformWantToWorkWith,PlatformAdmired,WebframeHaveWorkedWith,WebframeWantToWorkWith,WebframeAdmired,EmbeddedHaveWorkedWith,EmbeddedWantToWorkWith,EmbeddedAdmired,MiscTechHaveWorkedWith,MiscTechWantToWorkWith,MiscTechAdmired,ToolsTechHaveWorkedWith,ToolsTechWantToWorkWith,ToolsTechAdmired,NEWCollabToolsHaveWorkedWith,NEWCollabToolsWantToWorkWith,NEWCollabToolsAdmired,OpSysPersonal use,OpSysProfessional use,OfficeStackAsyncHaveWorkedWith,OfficeStackAsyncWantToWorkWith,OfficeStackAsyncAdmired,OfficeStackSyncHaveWorkedWith,OfficeStackSyncWantToWorkWith,OfficeStackSyncAdmired,AISearchDevHaveWorkedWith,AISearchDevWantToWorkWith,AISearchDevAdmired,NEWSOSites,SOVisitFreq,SOAccount,SOPartFreq,SOHow,SOComm,AISelect,AISent,AIBen,AIAcc,AIComplex,AIToolCurrently Using,AIToolInterested in Using,AIToolNot interested in Using,AINextMuch more integrated,AINextNo change,AINextMore integrated,AINextLess integrated,AINextMuch less integrated,AIThreat,AIEthics,AIChallenges,TBranch,ICorPM,WorkExp,Knowledge_1,Knowledge_2,Knowledge_3,Knowledge_4,Knowledge_5,Knowledge_6,Knowledge_7,Knowledge_8,Knowledge_9,Frequency_1,Frequency_2,Frequency_3,TimeSearching,TimeAnswering,Frustration,ProfessionalTech,ProfessionalCloud,ProfessionalQuestion,Industry,JobSatPoints_1,JobSatPoints_4,JobSatPoints_5,JobSatPoints_6,JobSatPoints_7,JobSatPoints_8,JobSatPoints_9,JobSatPoints_10,JobSatPoints_11,SurveyLength,SurveyEase,ConvertedCompYearly,JobSat
0,1,I am a developer by profession,Under 18 years old,"Employed, full-time",Remote,Apples,Hobby,Primary/elementary school,Books / Physical media,,,,,,,,,,,United States of America,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,I have never visited Stack Overflow or the Sta...,,,,,,Yes,Very favorable,Increase productivity,,,,,,,,,,,,,,No,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2,I am a developer by profession,35-44 years old,"Employed, full-time",Remote,Apples,Hobby;Contribute to open-source projects;Other...,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)",Books / Physical media;Colleague;On the job tr...,Technical documentation;Blogs;Books;Written Tu...,API document(s) and/or SDK document(s);User gu...,20.0,17.0,"Developer, full-stack",,,,,,United Kingdom of Great Britain and Northern I...,,,Bash/Shell (all shells);Go;HTML/CSS;Java;JavaS...,Bash/Shell (all shells);Go;HTML/CSS;Java;JavaS...,Bash/Shell (all shells);Go;HTML/CSS;Java;JavaS...,Dynamodb;MongoDB;PostgreSQL,PostgreSQL,PostgreSQL,Amazon Web Services (AWS);Heroku;Netlify,Amazon Web Services (AWS);Heroku;Netlify,Amazon Web Services (AWS);Heroku;Netlify,Express;Next.js;Node.js;React,Express;Htmx;Node.js;React;Remix,Express;Node.js;React,,,,,,,Docker;Homebrew;Kubernetes;npm;Vite;Webpack,Docker;Homebrew;Kubernetes;npm;Vite;Webpack,Docker;Homebrew;Kubernetes;npm;Vite;Webpack,PyCharm;Visual Studio Code;WebStorm,PyCharm;Visual Studio Code;WebStorm,PyCharm;Visual Studio Code;WebStorm,MacOS;Windows,MacOS,,,,Microsoft Teams;Slack,Slack,Slack,,,,Stack Overflow for Teams (private knowledge sh...,Multiple times per day,Yes,Multiple times per day,Quickly finding code solutions;Finding reliabl...,"Yes, definitely","No, and I don't plan to",,,,,,,,,,,,,,,,Yes,Individual contributor,17.0,Agree,Disagree,Agree,Agree,Agree,Neither agree nor disagree,Disagree,Agree,Agree,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,
2,3,I am a developer by profession,45-54 years old,"Employed, full-time",Remote,Apples,Hobby;Contribute to open-source projects;Other...,"Master’s degree (M.A., M.S., M.Eng., MBA, etc.)",Books / Physical media;Colleague;On the job tr...,Technical documentation;Blogs;Books;Written Tu...,API document(s) and/or SDK document(s);User gu...,37.0,27.0,Developer Experience,,,,,,United Kingdom of Great Britain and Northern I...,,,C#,C#,C#,Firebase Realtime Database,Firebase Realtime Database,Firebase Realtime Database,Google Cloud,Google Cloud,Google Cloud,ASP.NET CORE,ASP.NET CORE,ASP.NET CORE,Rasberry Pi,Rasberry Pi,Rasberry Pi,.NET (5+) ;.NET Framework (1.0 - 4.8);.NET MAUI,.NET (5+) ;.NET Framework (1.0 - 4.8);.NET MAUI,.NET (5+) ;.NET Framework (1.0 - 4.8);.NET MAUI,MSBuild,MSBuild,MSBuild,Visual Studio,Visual Studio,Visual Studio,Windows,Windows,,,,Google Chat;Google Meet;Microsoft Teams;Zoom,Google Chat;Google Meet;Zoom,Google Chat;Google Meet;Zoom,,,,Stack Overflow;Stack Exchange;Stack Overflow B...,Multiple times per day,Yes,Multiple times per day,Quickly finding code solutions;Finding reliabl...,"Yes, definitely","No, and I don't plan to",,,,,,,,,,,,,,,,No,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Appropriate in length,Easy,,
3,4,I am learning to code,18-24 years old,"Student, full-time",,Apples,,Some college/university study without earning ...,"Other online resources (e.g., videos, blogs, f...",Stack Overflow;How-to videos;Interactive tutorial,,4.0,,"Developer, full-stack",,,,,,Canada,,,C;C++;HTML/CSS;Java;JavaScript;PHP;PowerShell;...,HTML/CSS;Java;JavaScript;PowerShell;Python;SQL...,HTML/CSS;Java;JavaScript;PowerShell;Python;SQL...,MongoDB;MySQL;PostgreSQL;SQLite,MongoDB;MySQL;PostgreSQL,MongoDB;MySQL;PostgreSQL,Amazon Web Services (AWS);Fly.io;Heroku,Amazon Web Services (AWS);Vercel,Amazon Web Services (AWS),jQuery;Next.js;Node.js;React;WordPress,jQuery;Next.js;Node.js;React,jQuery;Next.js;Node.js;React,Rasberry Pi,,,NumPy;Pandas;Ruff;TensorFlow,,,Docker;npm;Pip,Docker;Kubernetes;npm,Docker;npm,,,,,,,,,,,,,,,Stack Overflow,Daily or almost daily,No,,Quickly finding code solutions,"No, not really",Yes,Very favorable,Increase productivity;Greater efficiency;Impro...,Somewhat trust,Bad at handling complex tasks,Learning about a codebase;Project planning;Wri...,Testing code;Committing and reviewing code;Pre...,,Learning about a codebase;Project planning;Wri...,,,,,No,Circulating misinformation or disinformation;M...,Don’t trust the output or answers,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Too long,Easy,,
4,5,I am a developer by profession,18-24 years old,"Student, full-time",,Apples,,"Secondary school (e.g. American high school, G...","Other online resources (e.g., videos, blogs, f...",Technical documentation;Blogs;Written Tutorial...,API document(s) and/or SDK document(s);User gu...,9.0,,"Developer, full-stack",,,,,,Norway,,,C++;HTML/CSS;JavaScript;Lua;Python;Rust,C++;HTML/CSS;JavaScript;Lua;Python,C++;HTML/CSS;JavaScript;Lua;Python,PostgreSQL;SQLite,PostgreSQL;SQLite,PostgreSQL;SQLite,,,,,,,CMake;Cargo;Rasberry Pi,CMake;Rasberry Pi,CMake;Rasberry Pi,,,,APT;Make;npm,APT;Make,APT;Make,Vim,Vim,Vim,Other (please specify):,,GitHub Discussions;Markdown File;Obsidian;Stac...,GitHub Discussions;Markdown File;Obsidian,GitHub Discussions;Markdown File;Obsidian,Discord;Whatsapp,Discord;Whatsapp,Discord;Whatsapp,,,,Stack Overflow for Teams (private knowledge sh...,Multiple times per day,Yes,Multiple times per day,Quickly finding code solutions;Engage with com...,"Yes, definitely","No, and I don't plan to",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Too short,Easy,,


In [5]:
'''
df.tail() - Shows the last 5 rows by default.
- Can specify the number of last rows to show (e.g., df.tail(10)).
'''
df.tail()

Unnamed: 0,ResponseId,MainBranch,Age,Employment,RemoteWork,Check,CodingActivities,EdLevel,LearnCode,LearnCodeOnline,TechDoc,YearsCode,YearsCodePro,DevType,OrgSize,PurchaseInfluence,BuyNewTool,BuildvsBuy,TechEndorse,Country,Currency,CompTotal,LanguageHaveWorkedWith,LanguageWantToWorkWith,LanguageAdmired,DatabaseHaveWorkedWith,DatabaseWantToWorkWith,DatabaseAdmired,PlatformHaveWorkedWith,PlatformWantToWorkWith,PlatformAdmired,WebframeHaveWorkedWith,WebframeWantToWorkWith,WebframeAdmired,EmbeddedHaveWorkedWith,EmbeddedWantToWorkWith,EmbeddedAdmired,MiscTechHaveWorkedWith,MiscTechWantToWorkWith,MiscTechAdmired,ToolsTechHaveWorkedWith,ToolsTechWantToWorkWith,ToolsTechAdmired,NEWCollabToolsHaveWorkedWith,NEWCollabToolsWantToWorkWith,NEWCollabToolsAdmired,OpSysPersonal use,OpSysProfessional use,OfficeStackAsyncHaveWorkedWith,OfficeStackAsyncWantToWorkWith,OfficeStackAsyncAdmired,OfficeStackSyncHaveWorkedWith,OfficeStackSyncWantToWorkWith,OfficeStackSyncAdmired,AISearchDevHaveWorkedWith,AISearchDevWantToWorkWith,AISearchDevAdmired,NEWSOSites,SOVisitFreq,SOAccount,SOPartFreq,SOHow,SOComm,AISelect,AISent,AIBen,AIAcc,AIComplex,AIToolCurrently Using,AIToolInterested in Using,AIToolNot interested in Using,AINextMuch more integrated,AINextNo change,AINextMore integrated,AINextLess integrated,AINextMuch less integrated,AIThreat,AIEthics,AIChallenges,TBranch,ICorPM,WorkExp,Knowledge_1,Knowledge_2,Knowledge_3,Knowledge_4,Knowledge_5,Knowledge_6,Knowledge_7,Knowledge_8,Knowledge_9,Frequency_1,Frequency_2,Frequency_3,TimeSearching,TimeAnswering,Frustration,ProfessionalTech,ProfessionalCloud,ProfessionalQuestion,Industry,JobSatPoints_1,JobSatPoints_4,JobSatPoints_5,JobSatPoints_6,JobSatPoints_7,JobSatPoints_8,JobSatPoints_9,JobSatPoints_10,JobSatPoints_11,SurveyLength,SurveyEase,ConvertedCompYearly,JobSat
65432,65433,I am a developer by profession,18-24 years old,"Employed, full-time",Remote,Apples,Hobby;School or academic work,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)","On the job training;School (i.e., University, ...",,,5.0,3.0,Blockchain,2 to 9 employees,I have some influence,Ask developers I know/work with;Ask a generati...,Is ready-to-go but also customizable for growt...,APIs;Integrated developer community;Quality su...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Yes,Favorable,Increase productivity;Greater efficiency;Speed...,Neither trust nor distrust,"Good, but not great at handling complex tasks",Learning about a codebase;Project planning;Wri...,Debugging and getting help;Testing code;Deploy...,Committing and reviewing code,Writing code;Search for answers;Generating con...,,Learning about a codebase;Project planning;Doc...,,,No,Circulating misinformation or disinformation,"AI tools lack context of codebase, internal a...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
65433,65434,I am a developer by profession,25-34 years old,"Employed, full-time",Remote,Apples,Hobby;Contribute to open-source projects,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Stack Overflow;Stack Exchange,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
65434,65435,I am a developer by profession,25-34 years old,"Employed, full-time",In-person,Apples,Hobby,"Bachelor’s degree (B.A., B.S., B.Eng., etc.)","Other online resources (e.g., videos, blogs, f...",Technical documentation;Stack Overflow;Social ...,API document(s) and/or SDK document(s);AI-powe...,9.0,5.0,"Developer, mobile","1,000 to 4,999 employees",I have little or no influence,Ask developers I know/work with;Ask a generati...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
65435,65436,I am a developer by profession,18-24 years old,"Employed, full-time","Hybrid (some remote, some in-person)",Apples,Hobby;Contribute to open-source projects;Profe...,"Secondary school (e.g. American high school, G...",On the job training;Other online resources (e....,Technical documentation;Blogs;Written Tutorial...,API document(s) and/or SDK document(s);User gu...,5.0,2.0,"Developer, back-end",20 to 99 employees,I have some influence,Start a free trial;Ask developers I know/work ...,Is ready-to-go but also customizable for growt...,APIs;Customization;Connected to an open source...,Germany,EUR European Euro,,C;C++;Go;Lua;Objective-C;Python;Rust;SQL,Rust;SQL,Rust;SQL,PostgreSQL;SQLite,PostgreSQL;SQLite,PostgreSQL;SQLite,,,,Angular;Node.js;React,,,Arduino;CMake;Cargo;GNU GCC;Rasberry Pi,Cargo;GNU GCC;Rasberry Pi,Cargo;GNU GCC;Rasberry Pi,GTK;OpenGL,,,Ansible;APT;Docker;Make;npm;Pip;Podman,Ansible;APT;Docker;Make;npm;Pip;Podman,Ansible;APT;Docker;Make;npm;Pip;Podman,Vim;Visual Studio Code,Vim;Visual Studio Code,Vim;Visual Studio Code,Android;Other Linux-based;Ubuntu;Windows,Fedora;Other Linux-based;Ubuntu,Confluence;Jira;Markdown File;Obsidian;Trello,Confluence;Jira;Markdown File;Trello,Confluence;Jira;Markdown File;Trello,Discord;Jitsi;Microsoft Teams;Whatsapp,Discord;Jitsi;Whatsapp,Discord;Jitsi;Whatsapp,,,,Stack Overflow;Stack Exchange,A few times per month or weekly,No,,Quickly finding code solutions,"No, not really","No, and I don't plan to",,,,,,,,,,,,,,,,Yes,Individual contributor,5.0,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,
65436,65437,I code primarily as a hobby,18-24 years old,"Student, full-time",,Apples,,,,,,,,,,,,,,,,,C;HTML/CSS;Java;JavaScript;PHP;Python;TypeScript,JavaScript;Python;TypeScript,JavaScript;Python;TypeScript,MongoDB;MySQL;Oracle;PostgreSQL,MongoDB;MySQL;PostgreSQL,MongoDB;MySQL;PostgreSQL,Amazon Web Services (AWS);Cloudflare;Google Cl...,Cloudflare;Google Cloud;Netlify;Vercel,Cloudflare;Google Cloud;Netlify;Vercel,Django;Express;jQuery;Next.js;Node.js;React,Django;Express;Next.js;Node.js;React,Django;Express;Next.js;Node.js;React,,,,,,,npm;Pip;Vite,npm;Pip;Vite,npm;Pip;Vite,Visual Studio Code,Visual Studio Code,Visual Studio Code,Windows,MacOS;Windows,,,,,,,,,,Stack Overflow,Daily or almost daily,Yes,Less than once per month or monthly,Quickly finding code solutions,"Yes, somewhat",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [6]:
'''
df.shape: Returns a tuple representing the dimensions of a DataFrame,
in the format (number of rows, number of columns). 
'''

df.shape

(65437, 114)

In [7]:
'''
df.info(): Provides a summary of the DataFrame, including the index type,
column names, non-null counts, and data type of each column.
'''

df.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 65437 entries, 0 to 65436
Data columns (total 114 columns):
 #    Column                          Dtype  
---   ------                          -----  
 0    ResponseId                      int64  
 1    MainBranch                      object 
 2    Age                             object 
 3    Employment                      object 
 4    RemoteWork                      object 
 5    Check                           object 
 6    CodingActivities                object 
 7    EdLevel                         object 
 8    LearnCode                       object 
 9    LearnCodeOnline                 object 
 10   TechDoc                         object 
 11   YearsCode                       object 
 12   YearsCodePro                    object 
 13   DevType                         object 
 14   OrgSize                         object 
 15   PurchaseInfluence               object 
 16   BuyNewTool                      object 
 17   BuildvsBuy

In [8]:
# Print out the schema dataframe
schema_df.head()

Unnamed: 0,qid,qname,question,force_resp,type,selector
0,QID2,MainBranch,Which of the following options best describes ...,True,MC,SAVR
1,QID127,Age,What is your age?*,True,MC,SAVR
2,QID296,Employment,Which of the following best describes your cur...,True,MC,MAVR
3,QID308,RemoteWork,Which best describes your current work situation?,False,MC,SAVR
4,QID341,Check,Just checking to make sure you are paying atte...,True,MC,SAVR


In [11]:
# Last 10 items of data
schema_df.tail(10)

Unnamed: 0,qid,qname,question,force_resp,type,selector
77,QID290,Frequency_3,Encountering knowledge silos (where one indivi...,,MC,MAVR
78,QID337,JobSatPoints_1,Driving strategy for my team,,MC,MAVR
79,QID337,JobSatPoints_4,Contributing to open source,,MC,MAVR
80,QID337,JobSatPoints_5,Securing data and environments,,MC,MAVR
81,QID337,JobSatPoints_6,Improving quality of code and developer enviro...,,MC,MAVR
82,QID337,JobSatPoints_7,"Learning and using new technology, including p...",,MC,MAVR
83,QID337,JobSatPoints_8,"Designing and building environments, databases...",,MC,MAVR
84,QID337,JobSatPoints_9,"Being a power user of a tool, developer langua...",,MC,MAVR
85,QID337,JobSatPoints_10,Working with new and/or top-quality hardware,,MC,MAVR
86,QID337,JobSatPoints_11,A well-staffed/sourced internal network that m...,,MC,MAVR
