## Get the pandas version?

In [2]:
import numpy as np  # optional
import pandas as pd

# print(pd.__version__)
# print(pd.show_versions(as_json=True))

## create a series from a list, numpy array and dict?

In [19]:
# from list
mylist = list('ABCDE')

In [26]:
# Solution
pd.Series(mylist)

0    A
1    B
2    C
3    D
4    E
dtype: object

In [21]:
# from arr
myarr = np.arange(5)

In [22]:
# Solution
pd.Series(myarr)

0    0
1    1
2    2
3    3
4    4
dtype: int32

In [24]:
# from dict
mydict = dict(zip(myarr, mylist))
mydict

{0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E'}

In [25]:
# Solution
pd.Series(mydict)

0    A
1    B
2    C
3    D
4    E
dtype: object

## convert the index of a series into a column of a dataframe?

Convert the series $ser$ into a dataframe with its index as another column on the dataframe.

In [31]:
# from list
mylist1 = list('ABCDE')
mylist2 = list('abcde')

In [32]:
mydict = dict(zip(mylist1, mylist2))

In [34]:
ser = pd.Series(mydict)
ser

A    a
B    b
C    c
D    d
E    e
dtype: object

In [46]:
# solution
print(pd.DataFrame(ser, columns = ['value']))
print(pd.DataFrame(ser, columns = ['value']).reset_index())

  value
A     a
B     b
C     c
D     d
E     e
  index value
0     A     a
1     B     b
2     C     c
3     D     d
4     E     e


## How to combine many series to form a dataframe?

In [47]:
ser1 = pd.Series(list('abcdef'))
ser2 = pd.Series(np.arange(6))

In [48]:
# solution1 - using pd.DataFrame
pd.DataFrame({'col1': ser1, 'col2': ser2})

Unnamed: 0,col1,col2
0,a,0
1,b,1
2,c,2
3,d,3
4,e,4
5,f,5


In [50]:
# solution2 - using pd.concat
pd.concat([ser1, ser2], axis=1)

Unnamed: 0,0,1
0,a,0
1,b,1
2,c,2
3,d,3
4,e,4
5,f,5


## assign name to the series’ index

In [51]:
pd.Series([1, 2, 3, 4, 5], name = 'Price')

0    1
1    2
2    3
3    4
4    5
Name: Price, dtype: int64

## get the items of series A not present in series B?

In [52]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [55]:
ser1[~ser1.isin(ser2)]

0    1
1    2
2    3
dtype: int64

## get the minimum, 25th percentile, median, 75th, and max of a numeric series?

In [56]:
ser = pd.Series(np.random.normal(10, 5, 25))

In [57]:
np.percentile(ser, q=[0, 25, 50, 75, 100])

array([ 1.87355022,  8.34528211, 10.03686855, 13.50454725, 21.46892701])

## get frequency counts of unique items of a series?

In [60]:
mylist = list('abcdefgh')

ser = pd.Series(np.take(mylist, np.random.randint(len(mylist), size=30)))
ser

0     d
1     e
2     e
3     c
4     c
5     e
6     h
7     a
8     c
9     h
10    g
11    d
12    e
13    a
14    c
15    h
16    f
17    b
18    a
19    f
20    b
21    f
22    d
23    a
24    b
25    d
26    d
27    f
28    d
29    e
dtype: object

In [61]:
# Solution
ser.value_counts()

d    6
e    5
a    4
f    4
c    4
b    3
h    3
g    1
dtype: int64

## keep only top 2 most frequent values as it is and replace everything else as ‘Other’?

In [68]:
mylist = list('abcdefgh')

ser = pd.Series(np.take(mylist, np.random.randint(len(mylist), size=30)))

In [71]:
ser.value_counts()

b    8
c    5
h    4
f    4
a    3
e    3
g    3
dtype: int64

In [73]:
# solution
print("Top 2 Freq:\n", ser.value_counts())

Top 2 Freq:
 b    8
c    5
h    4
f    4
a    3
e    3
g    3
dtype: int64
