# Getting started with Pandas Dataframe

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("people-example.csv")

In [3]:
df

Unnamed: 0,First Name,Last Name,Country,age
0,Bob,Smith,United States,24
1,Alice,Williams,Canada,23
2,Malcolm,Jone,England,22
3,Felix,Brown,USA,23
4,Alex,Cooper,Poland,23
5,Tod,Campbell,United States,22
6,Derek,Ward,Switzerland,25


## Loading data from python dictionary

In [None]:
data = {'First Name': ['Bob', 'Alice', 'Malcolm', 'Felix', 'Alex', 'Tod', 'Derek'], 
        'Last Name': ['Smith', 'Williams', 'Jone', 'Brown', 'Cooper', 'Campbell', 'Ward'],
        'Country': ['United States', 'Canada', 'England', 'USA', 'Poland', 'United States', 'Switzerland'],
        'Age': [24, 23, 22, 23, 23, 22, 25]
       }

In [6]:
 df = pd.DataFrame(data)


In [7]:
df

Unnamed: 0,Age,Country,First Name,Last Name
0,24,United States,Bob,Smith
1,23,Canada,Alice,Williams
2,22,England,Malcolm,Jone
3,23,USA,Felix,Brown
4,23,Poland,Alex,Cooper
5,22,United States,Tod,Campbell
6,25,Switzerland,Derek,Ward


In [9]:
df.shape

(7, 4)

In [10]:
rows, columns = df.shape

In [11]:
rows

7

In [12]:
df.head()

Unnamed: 0,Age,Country,First Name,Last Name
0,24,United States,Bob,Smith
1,23,Canada,Alice,Williams
2,22,England,Malcolm,Jone
3,23,USA,Felix,Brown
4,23,Poland,Alex,Cooper


In [13]:
df.head(1)

Unnamed: 0,Age,Country,First Name,Last Name
0,24,United States,Bob,Smith


In [14]:
df.tail(5)

Unnamed: 0,Age,Country,First Name,Last Name
2,22,England,Malcolm,Jone
3,23,USA,Felix,Brown
4,23,Poland,Alex,Cooper
5,22,United States,Tod,Campbell
6,25,Switzerland,Derek,Ward


In [15]:
df.tail(1)

Unnamed: 0,Age,Country,First Name,Last Name
6,25,Switzerland,Derek,Ward


## Slicing Dataframe

In [16]:
df[2:5]

Unnamed: 0,Age,Country,First Name,Last Name
2,22,England,Malcolm,Jone
3,23,USA,Felix,Brown
4,23,Poland,Alex,Cooper


In [17]:
df[:]

Unnamed: 0,Age,Country,First Name,Last Name
0,24,United States,Bob,Smith
1,23,Canada,Alice,Williams
2,22,England,Malcolm,Jone
3,23,USA,Felix,Brown
4,23,Poland,Alex,Cooper
5,22,United States,Tod,Campbell
6,25,Switzerland,Derek,Ward


## Accessing columns

In [20]:
df.columns

Index([u'Age', u'Country', u'First Name', u'Last Name'], dtype='object')

In [21]:
df.Age

0    24
1    23
2    22
3    23
4    23
5    22
6    25
Name: Age, dtype: int64

In [22]:
df.Country

0    United States
1           Canada
2          England
3              USA
4           Poland
5    United States
6      Switzerland
Name: Country, dtype: object

In [23]:
df['Age']

0    24
1    23
2    22
3    23
4    23
5    22
6    25
Name: Age, dtype: int64

In [24]:
df['Country']

0    United States
1           Canada
2          England
3              USA
4           Poland
5    United States
6      Switzerland
Name: Country, dtype: object

In [25]:
type(df["Country"])

pandas.core.series.Series

In [26]:
df[['Country', 'Age']]

Unnamed: 0,Country,Age
0,United States,24
1,Canada,23
2,England,22
3,USA,23
4,Poland,23
5,United States,22
6,Switzerland,25


## Operations

In [27]:
df['Age'].max()

25

In [28]:
df['Age'].min()

22

In [29]:
df['Age'].mean()

23.142857142857142

In [30]:
df['Age'].std()

1.0690449676496976

In [31]:
df.describe()

Unnamed: 0,Age
count,7.0
mean,23.142857
std,1.069045
min,22.0
25%,22.5
50%,23.0
75%,23.5
max,25.0


## Filter or Query

In [32]:
df[df.Age > 22]

Unnamed: 0,Age,Country,First Name,Last Name
0,24,United States,Bob,Smith
1,23,Canada,Alice,Williams
3,23,USA,Felix,Brown
4,23,Poland,Alex,Cooper
6,25,Switzerland,Derek,Ward


In [33]:
df[df.Age == df["Age"].max()]

Unnamed: 0,Age,Country,First Name,Last Name
6,25,Switzerland,Derek,Ward


In [36]:
df[['Age', 'Country']][df.Age == df["Age"].max()]

Unnamed: 0,Age,Country
6,25,Switzerland


In [40]:
df["Full Name"] = df["First Name"] + ' ' + df["Last Name"]

In [41]:
df

Unnamed: 0,Age,Country,First Name,Last Name,Full Name
0,24,United States,Bob,Smith,Bob Smith
1,23,Canada,Alice,Williams,Alice Williams
2,22,England,Malcolm,Jone,Malcolm Jone
3,23,USA,Felix,Brown,Felix Brown
4,23,Poland,Alex,Cooper,Alex Cooper
5,22,United States,Tod,Campbell,Tod Campbell
6,25,Switzerland,Derek,Ward,Derek Ward


## Index

In [37]:
df.index

RangeIndex(start=0, stop=7, step=1)

## Tranformation 

In [42]:
def transform_country(country):
    if country == "USA":
        return "United States"
    return country

In [45]:
df.applymap(transform_country)

Unnamed: 0,Age,Country,First Name,Last Name,Full Name
0,24,United States,Bob,Smith,Bob Smith
1,23,Canada,Alice,Williams,Alice Williams
2,22,England,Malcolm,Jone,Malcolm Jone
3,23,United States,Felix,Brown,Felix Brown
4,23,Poland,Alex,Cooper,Alex Cooper
5,22,United States,Tod,Campbell,Tod Campbell
6,25,Switzerland,Derek,Ward,Derek Ward


In [46]:
df

Unnamed: 0,Age,Country,First Name,Last Name,Full Name
0,24,United States,Bob,Smith,Bob Smith
1,23,Canada,Alice,Williams,Alice Williams
2,22,England,Malcolm,Jone,Malcolm Jone
3,23,USA,Felix,Brown,Felix Brown
4,23,Poland,Alex,Cooper,Alex Cooper
5,22,United States,Tod,Campbell,Tod Campbell
6,25,Switzerland,Derek,Ward,Derek Ward


In [50]:
df = df.applymap(transform_country)

In [51]:
df

Unnamed: 0,Age,Country,First Name,Last Name,Full Name
0,24,United States,Bob,Smith,Bob Smith
1,23,Canada,Alice,Williams,Alice Williams
2,22,England,Malcolm,Jone,Malcolm Jone
3,23,United States,Felix,Brown,Felix Brown
4,23,Poland,Alex,Cooper,Alex Cooper
5,22,United States,Tod,Campbell,Tod Campbell
6,25,Switzerland,Derek,Ward,Derek Ward


In [53]:
df["Country"].apply(transform_country)

0    United States
1           Canada
2          England
3    United States
4           Poland
5    United States
6      Switzerland
Name: Country, dtype: object

In [54]:
df["Country"] = df["Country"].apply(transform_country)

In [55]:
df

Unnamed: 0,Age,Country,First Name,Last Name,Full Name
0,24,United States,Bob,Smith,Bob Smith
1,23,Canada,Alice,Williams,Alice Williams
2,22,England,Malcolm,Jone,Malcolm Jone
3,23,United States,Felix,Brown,Felix Brown
4,23,Poland,Alex,Cooper,Alex Cooper
5,22,United States,Tod,Campbell,Tod Campbell
6,25,Switzerland,Derek,Ward,Derek Ward
