# **Pandas Introduction**

## 1. Install Pandas

In [2]:
#!pip install pandas

import pandas as pd

pd.__version__

'2.2.2'

## 2. Data Structures

In [4]:
s = pd.Series([3, -5, 7, 4])
s

0    3
1   -5
2    7
3    4
dtype: int64

In [5]:
s = pd.Series([3, -5, 7, 4], index=['a', 'b', 'c', 'd'])
s

a    3
b   -5
c    7
d    4
dtype: int64

In [4]:
data = {
    'Name': ['Anh', 'Thai', 'Hoa'],
    'Country': ['Brazil', 'Vietnam', 'Vietnam'],
    'Grade': [7.0, 8.0, 9.0]
}
df = pd.DataFrame(data, columns=['Name', 'Country', 'Grade'])
df

Unnamed: 0,Name,Country,Grade
0,Anh,Brazil,7.0
1,Thai,Vietnam,8.0
2,Hoa,Vietnam,9.0


In [10]:
data = {
    'Name': ['Anh', 'Thai', 'Hoa'],
    'Country': ['Brazil', 'Vietnam', 'Vietnam']
}
df = pd.DataFrame(data, columns=['Name', 'Country'])
df

Unnamed: 0,Name,Country
0,Anh,Brazil
1,Thai,Vietnam
2,Hoa,Vietnam


## 3. Selection

In [11]:
s['b']

-5

In [12]:
df[1:]

Unnamed: 0,Name,Country
1,Thai,Vietnam
2,Hoa,Vietnam


In [14]:
df.iloc[[0],[0]]

Unnamed: 0,Name
0,Anh


In [13]:
df.iloc[0,0]

'Anh'

In [21]:
df.loc[0,['Country']]

Country    Brazil
Name: 0, dtype: object

In [25]:
df.loc[0,['Name', 'Country']]

Name          Anh
Country    Brazil
Name: 0, dtype: object

In [5]:
df.head(n=2)

Unnamed: 0,Name,Country,Grade
0,Anh,Brazil,7.0
1,Thai,Vietnam,8.0


In [6]:
df.tail(n=2)

Unnamed: 0,Name,Country,Grade
1,Thai,Vietnam,8.0
2,Hoa,Vietnam,9.0


In [7]:
df.sample(frac=0.5)

Unnamed: 0,Name,Country,Grade
0,Anh,Brazil,7.0
2,Hoa,Vietnam,9.0


In [8]:
df.sample(n=2)

Unnamed: 0,Name,Country,Grade
1,Thai,Vietnam,8.0
2,Hoa,Vietnam,9.0


In [9]:
df.nlargest(n=2, columns='Grade')

Unnamed: 0,Name,Country,Grade
2,Hoa,Vietnam,9.0
1,Thai,Vietnam,8.0


In [10]:
df.nsmallest(n=2, columns='Grade')

Unnamed: 0,Name,Country,Grade
0,Anh,Brazil,7.0
1,Thai,Vietnam,8.0


In [11]:
df[['Name']]

Unnamed: 0,Name
0,Anh
1,Thai
2,Hoa


In [12]:
df.Grade

0    7.0
1    8.0
2    9.0
Name: Grade, dtype: float64

## 4. Dropping

In [13]:
df.drop([0, 1])

Unnamed: 0,Name,Country,Grade
2,Hoa,Vietnam,9.0


In [14]:
df.drop('Grade', axis=1)

Unnamed: 0,Name,Country
0,Anh,Brazil
1,Thai,Vietnam
2,Hoa,Vietnam


## 5. Sorting

In [15]:
df.sort_index(axis=1)

Unnamed: 0,Country,Grade,Name
0,Brazil,7.0,Anh
1,Vietnam,8.0,Thai
2,Vietnam,9.0,Hoa


In [16]:
df.sort_values('Name')

Unnamed: 0,Name,Country,Grade
0,Anh,Brazil,7.0
2,Hoa,Vietnam,9.0
1,Thai,Vietnam,8.0


## 6. Applying Functions

In [19]:
f = lambda x: x*2
df['Grade'].apply(f)

0    14.0
1    16.0
2    18.0
Name: Grade, dtype: float64

In [20]:
f = lambda x: x*2
df['Grade'].map(f)

0    14.0
1    16.0
2    18.0
Name: Grade, dtype: float64

In [21]:
f = lambda x: [1, 2]
df.apply(f)

Unnamed: 0,Name,Country,Grade
0,1,1,1
1,2,2,2


## 7. Change Layout

In [5]:
df.rename(columns={'Grade':'Score'})

Unnamed: 0,Name,Country,Score
0,Anh,Brazil,7.0
1,Thai,Vietnam,8.0
2,Hoa,Vietnam,9.0


## 8. Reindexing

In [6]:
df.set_index('Grade')

Unnamed: 0_level_0,Name,Country
Grade,Unnamed: 1_level_1,Unnamed: 2_level_1
7.0,Anh,Brazil
8.0,Thai,Vietnam
9.0,Hoa,Vietnam


In [12]:
df.set_index('Grade').reset_index()

Unnamed: 0,Grade,Name,Country
0,7.0,Anh,Brazil
1,8.0,Thai,Vietnam
2,9.0,Hoa,Vietnam


## 9. Replacing

In [28]:
df.replace({7.0:8.0})

Unnamed: 0,Name,Country,Grade
0,Anh,Brazil,8.0
1,Thai,Vietnam,8.0
2,Hoa,Vietnam,9.0


## 10. Group Data

In [38]:
newdf = df.replace({7.0:8.0})
newdf.groupby(by='Grade').min()

Unnamed: 0_level_0,Name,Country
Grade,Unnamed: 1_level_1,Unnamed: 2_level_1
8.0,Anh,Brazil
9.0,Hoa,Vietnam


## 11. Rolling

In [45]:
df['Grade'].rolling(3)

Rolling [window=3,center=False,axis=0,method=single]

In [46]:
df['Grade'].rolling(3).sum()

0     NaN
1     NaN
2    24.0
Name: Grade, dtype: float64

In [47]:
df['Grade'].rolling(3, center=True)

Rolling [window=3,center=True,axis=0,method=single]

In [48]:
df['Grade'].rolling(3, center=True).sum()

0     NaN
1    24.0
2     NaN
Name: Grade, dtype: float64