Pandas is the shorthand for 'Python and Data Analysis'. It provides a rich set of features for exploring and manipulating data, making it the go-to toolkit for a lot of data scientists.

In [1]:
import numpy as np
import pandas as pd
print(pd.__version__)

1.1.5


Creating some pandas series...

In [2]:
ser_a = pd.Series([1, 2, 3, 4], index=["a", "b", "c", "d"])
ser_b = pd.Series([1, 2, 3, 4], index=["b", "a", "c", "d"])

ser_a + ser_b

a    3
b    3
c    6
d    8
dtype: int64

... doing some element-wise operations

In [3]:
ser_a + ser_b
ser_a - ser_b
ser_a * ser_b
ser_a / ser_b

a    0.5
b    2.0
c    1.0
d    1.0
dtype: float64

... doing some aggregated operations 

In [4]:
ser_c = pd.Series([1, np.nan, 3, 4], index=["a", "b", "c", "d"])
print(ser_c.dtype)               # Which dtype has `ser_c`?

ser_c.count()             # => 3
ser_c.sum()               # => 8
ser_c.mean()              # => 2.67
ser_c.mean(skipna=False)  # => nan
ser_c.max()               # => 4
ser_c.min()               # => 1
ser_c.idxmax()            # => "d"

ser_d = pd.Series([1, "a", 3, 4], index=["a", "b", "c", "d"])
print(ser_d.dtype)        # which dtype has `ser_d`?

ser_e = pd.Series([1, 1, 1, np.nan, 3, 4])
ser_e

float64
object


0    1.0
1    1.0
2    1.0
3    NaN
4    3.0
5    4.0
dtype: float64

Cheking unique values

In [5]:
ser_e.unique() # => [ 1., nan,  3.,  4.]
ser_e.nunique() # => 3
ser_e.value_counts()

1.0    3
4.0    1
3.0    1
dtype: int64

Checking null values

In [6]:
ser_e.isna()

0    False
1    False
2    False
3     True
4    False
5    False
dtype: bool

In [7]:
ser_e.notna()

0     True
1     True
2     True
3    False
4     True
5     True
dtype: bool

In [8]:
ser_e.dropna()

0    1.0
1    1.0
2    1.0
4    3.0
5    4.0
dtype: float64

In [9]:
ser_e.fillna(ser_e.mean())

0    1.0
1    1.0
2    1.0
3    2.0
4    3.0
5    4.0
dtype: float64

In [10]:
ser_e.fillna(method="ffill")

0    1.0
1    1.0
2    1.0
3    1.0
4    3.0
5    4.0
dtype: float64

In [11]:
ser_e.fillna(method="bfill")

0    1.0
1    1.0
2    1.0
3    3.0
4    3.0
5    4.0
dtype: float64