In [1]:
import numpy as np
import pandas as pd

In [2]:
people = {
    "first": ["Corey", "Jane", "John", "Adam"],
    "last": ["Schafer", "Doe", "Doe", "Doe"],
    "email": ["CoreyMSchafer@gmail.com", "JaneDoe@gmail.com", "JohnDoe@gmail.com", "A@gmail.com"]
}
df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnDoe@gmail.com
3,Adam,Doe,A@gmail.com


In [3]:
df.sort_values(by='last')

Unnamed: 0,first,last,email
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnDoe@gmail.com
3,Adam,Doe,A@gmail.com
0,Corey,Schafer,CoreyMSchafer@gmail.com


if we want to sort the data in descending order then we use "ascending = False"

In [4]:
df.sort_values(by='last', ascending=False)

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnDoe@gmail.com
3,Adam,Doe,A@gmail.com


If we sorted according the the last name then if it is same we want to sort it in descending order based on the first name then we do:

In [5]:
df.sort_values(by=['last', 'first'], ascending=False)

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
2,John,Doe,JohnDoe@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
3,Adam,Doe,A@gmail.com


In [6]:
df # the original DataFrame is not changed.

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnDoe@gmail.com
3,Adam,Doe,A@gmail.com


If we sorted descending according the the last name then if it is same we want to sort it in ascending order based on the first name then we do:

In [7]:
df.sort_values(by=['last', 'first'], ascending=[False, True])

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
3,Adam,Doe,A@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnDoe@gmail.com


In [8]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnDoe@gmail.com
3,Adam,Doe,A@gmail.com


In [9]:
df.sort_values(by=['last', 'first'], ascending=[False, True], inplace=True)

In [10]:
df

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
3,Adam,Doe,A@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnDoe@gmail.com


In [11]:
#If we want to sort by index we do:
df.sort_index()

Unnamed: 0,first,last,email
0,Corey,Schafer,CoreyMSchafer@gmail.com
1,Jane,Doe,JaneDoe@gmail.com
2,John,Doe,JohnDoe@gmail.com
3,Adam,Doe,A@gmail.com


In [13]:
# We can also sort the values of a series like one column ex.'last'
df['last'].sort_values()

3        Doe
1        Doe
2        Doe
0    Schafer
Name: last, dtype: object

In [14]:
df = pd.read_csv('data/survey_results_public.csv')
schema_df = pd.read_csv('data/survey_results_schema.csv')

In [15]:
df.head()

Unnamed: 0,Respondent,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,...,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
0,1,I am a student who is learning to code,Yes,Never,The quality of OSS and closed source software ...,"Not employed, and not looking for work",United Kingdom,No,Primary/elementary school,,...,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,14.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
1,2,I am a student who is learning to code,No,Less than once per year,The quality of OSS and closed source software ...,"Not employed, but looking for work",Bosnia and Herzegovina,"Yes, full-time","Secondary school (e.g. American high school, G...",,...,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,19.0,Man,No,Straight / Heterosexual,,No,Appropriate in length,Neither easy nor difficult
2,3,"I am not primarily a developer, but I write co...",Yes,Never,The quality of OSS and closed source software ...,Employed full-time,Thailand,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)",Web development or web design,...,Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,28.0,Man,No,Straight / Heterosexual,,Yes,Appropriate in length,Neither easy nor difficult
3,4,I am a developer by profession,No,Never,The quality of OSS and closed source software ...,Employed full-time,United States,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",...,Just as welcome now as I felt last year,Tech articles written by other developers;Indu...,22.0,Man,No,Straight / Heterosexual,White or of European descent,No,Appropriate in length,Easy
4,5,I am a developer by profession,Yes,Once a month or more often,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Ukraine,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",...,Just as welcome now as I felt last year,Tech meetups or events in your area;Courses on...,30.0,Man,No,Straight / Heterosexual,White or of European descent;Multiracial,No,Appropriate in length,Easy


In [17]:
df['Country'].sort_values().head(50)

39018    Afghanistan
62723    Afghanistan
85185    Afghanistan
50437    Afghanistan
88340    Afghanistan
88735    Afghanistan
6391     Afghanistan
39754    Afghanistan
88181    Afghanistan
48122    Afghanistan
10697    Afghanistan
62124    Afghanistan
49377    Afghanistan
32971    Afghanistan
60559    Afghanistan
85827    Afghanistan
88033    Afghanistan
28470    Afghanistan
719      Afghanistan
26189    Afghanistan
86394    Afghanistan
85293    Afghanistan
7056     Afghanistan
7323     Afghanistan
86551    Afghanistan
58390    Afghanistan
73920    Afghanistan
28877    Afghanistan
51525    Afghanistan
45562    Afghanistan
46807    Afghanistan
44123    Afghanistan
22327    Afghanistan
8112     Afghanistan
80423    Afghanistan
59184    Afghanistan
60185    Afghanistan
37571    Afghanistan
29560    Afghanistan
42832    Afghanistan
12253    Afghanistan
58082    Afghanistan
61770    Afghanistan
50172    Afghanistan
78326        Albania
57057        Albania
49431        Albania
41344        

In [19]:
df.sort_values(by='Country', inplace=True)
df.head()

Unnamed: 0,Respondent,MainBranch,Hobbyist,OpenSourcer,OpenSource,Employment,Country,Student,EdLevel,UndergradMajor,...,WelcomeChange,SONewContent,Age,Gender,Trans,Sexuality,Ethnicity,Dependents,SurveyLength,SurveyEase
39018,39258,I am a developer by profession,Yes,Less than once per year,"OSS is, on average, of LOWER quality than prop...",Employed full-time,Afghanistan,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Computer science, computer engineering, or sof...",...,,Tech articles written by other developers;Indu...,26.0,Man,No,Straight / Heterosexual,East Asian;South Asian,Yes,Too long,Easy
62723,63129,I am a developer by profession,Yes,Never,"OSS is, on average, of HIGHER quality than pro...",Employed full-time,Afghanistan,"Yes, full-time",I never completed any formal education,,...,,Tech articles written by other developers,,,,,,Yes,Too short,Easy
85185,85715,I am a developer by profession,No,Less than once per year,The quality of OSS and closed source software ...,Employed full-time,Afghanistan,No,"Bachelor’s degree (BA, BS, B.Eng., etc.)","Information systems, information technology, o...",...,Somewhat less welcome now than last year,Tech articles written by other developers;Indu...,28.0,Man,No,,,Yes,Too long,Neither easy nor difficult
50437,50767,I am a developer by profession,No,Less than once per year,,"Independent contractor, freelancer, or self-em...",Afghanistan,"Yes, part-time",Associate degree,"A health science (ex. nursing, pharmacy, radio...",...,,,,,,,,,,
88340,2782,,Yes,Less than once per year,"OSS is, on average, of HIGHER quality than pro...",,Afghanistan,No,,I never declared a major,...,Just as welcome now as I felt last year,,60.0,Man,,,,,Appropriate in length,Easy


In [21]:
df["Country"].head(50)

39018    Afghanistan
62723    Afghanistan
85185    Afghanistan
50437    Afghanistan
88340    Afghanistan
88735    Afghanistan
6391     Afghanistan
39754    Afghanistan
88181    Afghanistan
48122    Afghanistan
10697    Afghanistan
62124    Afghanistan
49377    Afghanistan
32971    Afghanistan
60559    Afghanistan
85827    Afghanistan
88033    Afghanistan
28470    Afghanistan
719      Afghanistan
26189    Afghanistan
86394    Afghanistan
85293    Afghanistan
7056     Afghanistan
7323     Afghanistan
86551    Afghanistan
58390    Afghanistan
73920    Afghanistan
28877    Afghanistan
51525    Afghanistan
45562    Afghanistan
46807    Afghanistan
44123    Afghanistan
22327    Afghanistan
8112     Afghanistan
80423    Afghanistan
59184    Afghanistan
60185    Afghanistan
37571    Afghanistan
29560    Afghanistan
42832    Afghanistan
12253    Afghanistan
58082    Afghanistan
61770    Afghanistan
50172    Afghanistan
78326        Albania
57057        Albania
49431        Albania
41344        

In [22]:
df[["Country", "ConvertedComp"]].head(50)

Unnamed: 0,Country,ConvertedComp
39018,Afghanistan,19152.0
62723,Afghanistan,1000000.0
85185,Afghanistan,
50437,Afghanistan,
88340,Afghanistan,
88735,Afghanistan,
6391,Afghanistan,
39754,Afghanistan,
88181,Afghanistan,
48122,Afghanistan,4464.0


In [23]:
df.sort_values(by=["Country", "ConvertedComp"], ascending=[True, False], inplace=True)

In [25]:
df[["Country", "ConvertedComp"]].head(50)

Unnamed: 0,Country,ConvertedComp
62723,Afghanistan,1000000.0
50172,Afghanistan,153216.0
39018,Afghanistan,19152.0
58082,Afghanistan,17556.0
7056,Afghanistan,14364.0
22327,Afghanistan,7980.0
48122,Afghanistan,4464.0
10697,Afghanistan,3996.0
8112,Afghanistan,1596.0
29560,Afghanistan,1116.0


In [28]:
df["ConvertedComp"].sort_values(ascending=False).head(10)

69083    2000000.0
54294    2000000.0
59493    2000000.0
59488    2000000.0
47968    2000000.0
59400    2000000.0
47712    2000000.0
59634    2000000.0
59525    2000000.0
59173    2000000.0
Name: ConvertedComp, dtype: float64