# DataFrame Basics III

## Sorting DataFrames with sort_index() and sort_values()

In [1]:
import pandas as pd

In [2]:
titanic = pd.read_csv("titanic.csv")

In [3]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [4]:
titanic.tail()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
886,0,2,male,27.0,0,0,13.0,S,
887,1,1,female,19.0,0,0,30.0,S,B
888,0,3,female,,1,2,23.45,S,
889,1,1,male,26.0,0,0,30.0,C,C
890,0,3,male,32.0,0,0,7.75,Q,


In [None]:
titanic.age.sort_values()

In [None]:
titanic.sort_values("age")

In [None]:
titanic.head()

In [None]:
titanic.sort_values("age", axis = 0, ascending = True, inplace = True)

In [None]:
titanic.head()

In [None]:
titanic.sort_values(["pclass", "sex", "age"], ascending = [True, False, True], inplace= True)

In [None]:
titanic.head()

In [None]:
titanic.tail()

In [None]:
titanic.sort_index(ascending=True, inplace = True)

In [None]:
titanic.head()

## Ranking DataFrames with rank()

In [None]:
import pandas as pd

In [None]:
sales = pd.Series([15, 32, 45, 21, 55, 15, 0],  index = ["Mo", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"])

In [None]:
sales

In [None]:
sales.sort_values(ascending = False)

In [None]:
sales.rank(ascending=False, method = "min").sort_values(ascending = True)

In [None]:
sales.rank(ascending=False, method = "min", pct=True).sort_values()

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.fare.rank(ascending = False)

In [None]:
titanic["fare_rank"] = titanic.fare.rank(ascending = False, method="min")

In [None]:
titanic.head()

In [None]:
titanic.sort_values("fare", ascending= False)

In [None]:
titanic.drop(columns = "fare_rank", inplace= True)

## nunique(), nlargest() and nsmallest() with DataFrames

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head()

In [None]:
titanic.tail()

In [None]:
titanic.age.unique()

In [None]:
titanic.nunique(axis = 1, dropna=False)

In [None]:
titanic.nunique(dropna = False)

In [None]:
titanic.nlargest(n = 5, columns = "fare")

In [None]:
titanic.sort_values("fare", ascending = False).head(5)

In [None]:
titanic.nsmallest(n = 1, columns = "age")

In [None]:
titanic.loc[titanic.age.idxmin()]

## Summary Statistics and Accumulations

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head()

In [None]:
titanic.describe()

In [None]:
titanic.count(axis = "columns")

In [None]:
titanic.count(axis = 1)

In [None]:
titanic.mean(axis = 1)

In [None]:
titanic.sum(axis = 0)

In [None]:
titanic.head()

In [None]:
titanic.fare.cumsum(axis = 0)

In [None]:
titanic.corr()

In [None]:
titanic.survived.corr(titanic.pclass)

## apply(), map() and applymap()

In [None]:
import pandas as pd

In [None]:
sales = pd.read_csv("sales.csv", index_col = 0)

In [None]:
sales

In [None]:
sales.info()

In [None]:
sales.min(axis = 0)

In [None]:
sales.min(axis = 1)

In [None]:
def range(series):
    return series.max() - series.min()

In [None]:
sales.apply(lambda x: x.max() - x.min(), axis = 0)

In [None]:
summer = pd.read_csv("summer.csv")

In [None]:
summer.head()

In [None]:
summer.Athlete.apply(lambda x: x[0])

In [None]:
summer.Athlete.map(lambda x: x[0])

In [None]:
summer.iloc[:,1:3].applymap(lambda x: x[0])

In [None]:
sales.applymap(lambda x: 0.4*x-5)

In [None]:
sales*0.4-5

## Hierarchical Indexing (MultiIndex) Intro

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic

In [None]:
titanic = titanic.iloc[:50, :]

In [None]:
titanic.set_index(["pclass", "sex"], inplace = True)

In [None]:
titanic

In [None]:
titanic.sort_index(ascending = [True, True], inplace = True )

In [None]:
titanic

In [None]:
titanic.swaplevel()

In [None]:
titanic

In [None]:
titanic.reset_index(inplace = True)

In [None]:
titanic

## Hierarchical Indexing (MultiIndex) Part 2

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic = titanic.iloc[:50,]

In [None]:
titanic

In [None]:
titanic = titanic.set_index(["pclass", "sex"]).sort_index(ascending = True)

In [None]:
titanic

In [None]:
titanic.loc[1]

In [None]:
titanic.loc[[1,2]]

In [None]:
titanic.loc[:2]

In [None]:
titanic.loc[1, "female"]

In [None]:
#titanic.loc[1, "female", "age"]

In [None]:
titanic.loc[(1,"female")]

In [None]:
titanic.loc[(1,"female"), "age"]

In [None]:
titanic.loc[([1,2],"female"), ["age", "fare"]]

In [None]:
titanic.loc[([1, 2],"female"), :]

In [None]:
titanic

In [None]:
titanic.loc[(slice(1), slice("female")), :]

## String Operations Intro / Refresher

In [None]:
"Hello World"

In [None]:
type("Hello World")

In [None]:
hello = "Hello World"
hello

In [None]:
len(hello)

In [None]:
hello.lower()

In [None]:
hello.upper()

In [None]:
hello.title()

In [None]:
hello.split(" ")

In [None]:
hello.replace("Hello", "Hi")

In [None]:
import pandas as pd

In [None]:
summer = pd.read_csv("summer.csv")

In [None]:
summer.head()

In [None]:
names = summer.loc[:9, "Athlete"].copy()

In [None]:
names

In [None]:
names.dtypes

In [None]:
names[0]

In [None]:
type(names[0])

In [None]:
names.str.lower()

## String Operations in Pandas

In [None]:
import pandas as pd

In [None]:
summer = pd.read_csv("summer.csv")

In [None]:
summer.head()

In [None]:
names = summer.loc[:9, "Athlete"].copy()

In [None]:
names

In [None]:
names.str.lower()

In [None]:
names.str.title()

In [None]:
summer.Event.str.split(" ", n = 2, expand= True)

In [None]:
summer[summer.Event.str.contains("100M")]