## 1. How to import pandas and check the version?

In [2]:
import pandas as pd
print(pd.__version__)

0.23.4


## 2. How to create a series from a list, numpy array and dict?

In [7]:
# Input
import numpy as np
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)  #creates an array of 1 to n
mydict = dict(zip(mylist, myarr))
print(mydict)

{'a': 0, 'b': 1, 'c': 2, 'e': 3, 'd': 4, 'f': 5, 'g': 6, 'h': 7, 'i': 8, 'j': 9, 'k': 10, 'l': 11, 'm': 12, 'n': 13, 'o': 14, 'p': 15, 'q': 16, 'r': 17, 's': 18, 't': 19, 'u': 20, 'v': 21, 'w': 22, 'x': 23, 'y': 24, 'z': 25}


In [12]:
ser1 = pd.Series(mylist)
ser2= pd.Series(myarr)
ser3= pd.Series(mydict)
ser3.head()

a    0
b    1
c    2
e    3
d    4
dtype: int64

## 3. How to convert the index of a series into a column of a dataframe?

In [14]:
ser1.head()

0    a
1    b
2    c
3    e
4    d
dtype: object

In [21]:
ser1.to_frame() #converts series to a data frame
ser1.to_frame().reset_index()

Unnamed: 0,index,0
0,0,a
1,1,b
2,2,c
3,3,e
4,4,d
5,5,f
6,6,g
7,7,h
8,8,i
9,9,j


## 4. How to combine many series to form a dataframe?

In [22]:
import numpy as np
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))

In [27]:
newDfRow = pd.DataFrame(
[ser1,ser2],
)
newDfRow

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,25
0,a,b,c,e,d,f,g,h,i,j,...,q,r,s,t,u,v,w,x,y,z
1,0,1,2,3,4,5,6,7,8,9,...,16,17,18,19,20,21,22,23,24,25


In [28]:
newDfCol = pd.DataFrame(
{'col 1' : ser1, 'col 2': ser2}
)
newDfCol 

Unnamed: 0,col 1,col 2
0,a,0
1,b,1
2,c,2
3,e,3
4,d,4
5,f,5
6,g,6
7,h,7
8,i,8
9,j,9


## 5. How to assign name to the series’ index?

In [29]:
ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))

In [32]:
ser.name = 'alphabets'
ser.head()

0    a
1    b
2    c
3    e
4    d
Name: alphabets, dtype: object

## 6. How to get the items of series A not present in series B?

In [33]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [34]:
ser1[~ser1.isin(ser2)]

0    1
1    2
2    3
dtype: int64

## 7. How to get the items not common to both series A and series B?

In [39]:
ser_u = pd.Series(np.union1d(ser1, ser2)) #applies to 2 arrays
ser_i = pd.Series(np.intersect1d(ser1,ser2))
ser_u[~ser_u.isin(ser_i)]

0    1
1    2
2    3
5    6
6    7
7    8
dtype: int64

## 8. How to get the minimum, 25th percentile, median, 75th, and max of a numeric series?

In [42]:
ser = pd.Series(np.random.normal(10, 5, 25))
ser

0     15.314957
1     10.663101
2      4.783942
3      3.399043
4     10.748832
5      6.823946
6      3.366339
7      4.978629
8     13.223477
9      9.646726
10    10.871690
11    16.174501
12     6.106637
13     0.728478
14    12.700368
15     6.253947
16     8.928153
17     9.089205
18    17.300289
19     6.074603
20    13.942595
21     4.192584
22    14.222960
23    17.148444
24    10.973601
dtype: float64

In [43]:
np.percentile(ser,q=[0,25,50,75,100])

array([ 0.72847765,  6.07460339,  9.64672594, 13.22347721, 17.30028918])

## 9. How to get frequency counts of unique items of a series?

In [46]:
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))

In [51]:
ser.value_counts()

a    6
b    6
f    5
h    4
c    3
g    3
d    2
e    1
dtype: int64

## 10. How to keep only top 2 most frequent values as it is and replace everything else as ‘Other’?

In [64]:
np.random.RandomState(100)
ser = pd.Series(np.random.randint(1, 5, [12]))

In [65]:
ser.value_counts()

2    5
3    4
4    3
dtype: int64

In [66]:
ser.isin(ser.value_counts().index[:2])

0      True
1     False
2      True
3      True
4      True
5     False
6      True
7      True
8     False
9      True
10     True
11     True
dtype: bool

In [67]:
print("Top 2: ", ser.value_counts())
ser[~ser.isin(ser.value_counts().index[:2])] = 'Other'
ser

Top 2:  2    5
3    4
4    3
dtype: int64


0         2
1     Other
2         3
3         2
4         2
5     Other
6         3
7         2
8     Other
9         3
10        2
11        3
dtype: object

## 11. How to bin a numeric series to 10 groups of equal size?

In [69]:
ser = pd.Series(np.random.random(20))
ser

0     0.553726
1     0.510594
2     0.478648
3     0.828189
4     0.326149
5     0.418502
6     0.918178
7     0.228182
8     0.403713
9     0.599127
10    0.845469
11    0.104790
12    0.675836
13    0.356152
14    0.092982
15    0.046031
16    0.211715
17    0.117285
18    0.816427
19    0.958066
dtype: float64

In [70]:
pd.qcut(ser, q=[0, .10, .20, .3, .4, .5, .6, .7, .8, .9, 1], 
        labels=['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th']).head()

0    7th
1    6th
2    6th
3    9th
4    4th
dtype: category
Categories (10, object): [1st < 2nd < 3rd < 4th ... 7th < 8th < 9th < 10th]

## 12. How to convert a numpy array to a dataframe of given shape? (L1)

In [76]:
ser = pd.Series(np.random.randint(1, 10, 35))
ser.head(5)

0    6
1    5
2    9
3    2
4    7
dtype: int32

In [80]:
ser.values

array([6, 5, 9, 2, 7, 7, 2, 7, 2, 6, 9, 3, 5, 5, 3, 6, 4, 8, 8, 2, 9, 5,
       5, 3, 7, 8, 9, 6, 9, 2, 7, 5, 7, 5, 3])

In [82]:
ser.values.reshape(7,5)

array([[6, 5, 9, 2, 7],
       [7, 2, 7, 2, 6],
       [9, 3, 5, 5, 3],
       [6, 4, 8, 8, 2],
       [9, 5, 5, 3, 7],
       [8, 9, 6, 9, 2],
       [7, 5, 7, 5, 3]])

In [77]:
df = pd.DataFrame(ser.values.reshape(7,5))
df

Unnamed: 0,0,1,2,3,4
0,6,5,9,2,7
1,7,2,7,2,6
2,9,3,5,5,3
3,6,4,8,8,2
4,9,5,5,3,7
5,8,9,6,9,2
6,7,5,7,5,3


## 13. How to find the positions of numbers that are multiples of 3 from a series?

In [90]:
ser = pd.Series(np.random.randint(1, 10, 7))
ser

0    3
1    5
2    9
3    2
4    3
5    6
6    3
dtype: int32

In [91]:
np.argwhere(ser % 3==0)

array([[0],
       [2],
       [4],
       [5],
       [6]], dtype=int64)

### 14. How to extract items at given positions from a series

In [92]:
ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))
pos = [0, 4, 8, 14, 20]

In [93]:
ser.take(pos)

0     a
4     e
8     i
14    o
20    u
dtype: object

### 15. How to stack two series vertically and horizontally ?


In [94]:
ser1 = pd.Series(range(5))
ser2 = pd.Series(list('abcde'))

In [96]:
ser1

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [98]:
ser3 = pd.DataFrame(
    [ser1,ser2])
ser3

Unnamed: 0,0,1,2,3,4
0,0,1,2,3,4
1,a,b,c,d,e


In [100]:
ser4 = ser1.append(ser2)
ser4

0    0
1    1
2    2
3    3
4    4
0    a
1    b
2    c
3    d
4    e
dtype: object

In [102]:
ser5 = pd.concat([ser1,ser2], axis=1)
ser5

Unnamed: 0,0,1
0,0,a
1,1,b
2,2,c
3,3,d
4,4,e
