# pandas 101 1- 10 

## 1. How to import pandas and check the version?

In [1]:
import pandas as pd
print(pd.__version__)

0.24.2


## 2. How to create a series from a list, numpy array and dict?

In [2]:
# input 
import numpy as np
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))

mydict 

{'a': 0,
 'b': 1,
 'c': 2,
 'e': 3,
 'd': 4,
 'f': 5,
 'g': 6,
 'h': 7,
 'i': 8,
 'j': 9,
 'k': 10,
 'l': 11,
 'm': 12,
 'n': 13,
 'o': 14,
 'p': 15,
 'q': 16,
 'r': 17,
 's': 18,
 't': 19,
 'u': 20,
 'v': 21,
 'w': 22,
 'x': 23,
 'y': 24,
 'z': 25}

In [5]:
# solution:
ser1=pd.Series(mylist)

ser1

0     a
1     b
2     c
3     e
4     d
5     f
6     g
7     h
8     i
9     j
10    k
11    l
12    m
13    n
14    o
15    p
16    q
17    r
18    s
19    t
20    u
21    v
22    w
23    x
24    y
25    z
dtype: object

In [6]:
# solution 
ser2= pd.Series(myarr)
ser2

0      0
1      1
2      2
3      3
4      4
5      5
6      6
7      7
8      8
9      9
10    10
11    11
12    12
13    13
14    14
15    15
16    16
17    17
18    18
19    19
20    20
21    21
22    22
23    23
24    24
25    25
dtype: int32

In [8]:
# solution 
ser3 = pd.Series(mydict)
ser3

a     0
b     1
c     2
e     3
d     4
f     5
g     6
h     7
i     8
j     9
k    10
l    11
m    12
n    13
o    14
p    15
q    16
r    17
s    18
t    19
u    20
v    21
w    22
x    23
y    24
z    25
dtype: int64

## 3. How to convert the index of a series into a column of a dataframe?

In [9]:
# input 
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))
ser = pd.Series(mydict)
ser.head()

a    0
b    1
c    2
e    3
d    4
dtype: int64

In [10]:
# solution 
df = ser.to_frame().reset_index()

df.head()

Unnamed: 0,index,0
0,a,0
1,b,1
2,c,2
3,e,3
4,d,4


In [11]:
df.set_index('index')

Unnamed: 0_level_0,0
index,Unnamed: 1_level_1
a,0
b,1
c,2
e,3
d,4
f,5
g,6
h,7
i,8
j,9


In [12]:
df.head()

Unnamed: 0,index,0
0,a,0
1,b,1
2,c,2
3,e,3
4,d,4


In [13]:
df.set_index('index', inplace=True)
df

Unnamed: 0_level_0,0
index,Unnamed: 1_level_1
a,0
b,1
c,2
e,3
d,4
f,5
g,6
h,7
i,8
j,9


## 4. How to combine many series to form a dataframe?

In [16]:
## input 
import numpy as np
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))

In [17]:
# solution 1 
df = pd.concat([ser1, ser2], axis=1)
df

Unnamed: 0,0,1
0,a,0
1,b,1
2,c,2
3,e,3
4,d,4
5,f,5
6,g,6
7,h,7
8,i,8
9,j,9


In [18]:
# solution 2 
df = pd.DataFrame({'col':ser1, 'col2':ser2})

df.head()

Unnamed: 0,col,col2
0,a,0
1,b,1
2,c,2
3,e,3
4,d,4


## 5. How to assign name to the series’ index?

In [20]:
# input 
# Give a name to the series ser calling it ‘alphabets’.
ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser

0     a
1     b
2     c
3     e
4     d
5     f
6     g
7     h
8     i
9     j
10    k
11    l
12    m
13    n
14    o
15    p
16    q
17    r
18    s
19    t
20    u
21    v
22    w
23    x
24    y
25    z
dtype: object

In [21]:
#solution 
ser.name='alphabets'

ser.head()

0    a
1    b
2    c
3    e
4    d
Name: alphabets, dtype: object

## 6. How to get the items of series A not present in series B?

In [22]:
# From ser1 remove items present in ser2.
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [23]:
# solution
ser1.isin(ser2)

0    False
1    False
2    False
3     True
4     True
dtype: bool

In [24]:
-ser1.isin(ser2)

0     True
1     True
2     True
3    False
4    False
dtype: bool

In [25]:
ser1[-ser1.isin(ser2)]

0    1
1    2
2    3
dtype: int64

## 7. How to get the items not common to both series A and series B?

In [None]:
# input 
# Get all items of ser1 and ser2 not common to both.

ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [26]:
# solution 
ser_u = pd.Series(np.union1d(ser1, ser2))
ser_i = pd.Series(np.intersect1d(ser1, ser2))

ser_u

0    1
1    2
2    3
3    4
4    5
5    6
6    7
7    8
dtype: int64

In [27]:
ser_i

0    4
1    5
dtype: int64

In [34]:
ser_u.isin(ser_i)

0    False
1    False
2    False
3     True
4     True
5    False
6    False
7    False
dtype: bool

In [35]:
- ser_u.isin(ser_i)

0     True
1     True
2     True
3    False
4    False
5     True
6     True
7     True
dtype: bool

In [36]:
ser_u[-ser_u.isin(ser_i)]

0    1
1    2
2    3
5    6
6    7
7    8
dtype: int64

## 8. How to get the minimum, 25th percentile, median, 75th, and max of a numeric series?

In [39]:
# Compute the minimum, 25th percentile, median, 75th, and maximum of ser.
# Input
state = np.random.RandomState(100)
ser = pd.Series(state.normal(10, 5, 25))

ser

0      1.251173
1     11.713402
2     15.765179
3      8.737820
4     14.906604
5     12.571094
6     11.105898
7      4.649783
8      9.052521
9     11.275007
10     7.709865
11    12.175817
12     7.082025
13    14.084235
14    13.363604
15     9.477944
16     7.343598
17    15.148663
18     7.809322
19     4.408409
20    18.094908
21    17.708026
22     8.740604
23     5.787821
24    10.922593
dtype: float64

In [40]:
# Solution
np.percentile(ser, q=[0, 25, 50, 75, 100])

array([ 1.25117263,  7.70986507, 10.92259345, 13.36360403, 18.0949083 ])

## 9. How to get frequency counts of unique items of a series?

In [42]:
# Input
# Calculte the frequency counts of each unique value ser.
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))

ser

0     f
1     c
2     g
3     c
4     c
5     c
6     a
7     a
8     f
9     d
10    f
11    d
12    e
13    a
14    d
15    g
16    h
17    e
18    b
19    a
20    d
21    a
22    c
23    h
24    b
25    e
26    d
27    e
28    g
29    d
dtype: object

In [43]:
# take 
a = [4, 3, 5, 7, 6, 8]
indices = [0, 1, 4]
np.take(a, indices)

array([4, 3, 6])

In [44]:
# solution 
ser.value_counts()

d    6
c    5
a    5
e    4
g    3
f    3
b    2
h    2
dtype: int64

## 10. How to keep only top 2 most frequent values as it is and replace everything else as ‘Other’?

From ser, keep the top 2 most frequent items as it is and replace everything else as ‘Other’.

In [46]:
# Input
np.random.RandomState(100)
ser = pd.Series(np.random.randint(1, 5, [12]))
ser

0     3
1     1
2     3
3     1
4     4
5     4
6     3
7     2
8     3
9     4
10    3
11    2
dtype: int32

In [49]:
ser_count=ser.value_counts()

ser_count

3    5
4    3
2    2
1    2
dtype: int64

In [51]:
top2=ser_count[:2]
top2

3    5
4    3
dtype: int64

In [53]:
ser.isin(top2)

0      True
1     False
2      True
3     False
4     False
5     False
6      True
7     False
8      True
9     False
10     True
11    False
dtype: bool

In [56]:
ser[-ser.isin(top2)]='other'

ser

0         3
1     other
2         3
3     other
4     other
5     other
6         3
7     other
8         3
9     other
10        3
11    other
dtype: object