In [1]:
import pandas as pd
import numpy as np

In [2]:
# How to create a series from a list, numpy array and dict? 
myList = [10, 20, 30, 40, 50]
listAsSeries = pd.Series(myList)
myArray = np.array([1, 2, 3, 4, 5])
arrayAsSeries = pd.Series(myArray)
myDict = {'name': 'Yahya', 'age': 20, 'city': 'Amsterdam'}
dictAsSeries = pd.Series(myDict)
print(listAsSeries)
print(arrayAsSeries)
print(dictAsSeries)

0    10
1    20
2    30
3    40
4    50
dtype: int64
0    1
1    2
2    3
3    4
4    5
dtype: int64
name        Yahya
age            20
city    Amsterdam
dtype: object


In [3]:
# How to combine many series to form a dataframe?
df = pd.concat([listAsSeries, arrayAsSeries, dictAsSeries], axis=1)
print(df)

         0    1          2
0     10.0  1.0        NaN
1     20.0  2.0        NaN
2     30.0  3.0        NaN
3     40.0  4.0        NaN
4     50.0  5.0        NaN
name   NaN  NaN      Yahya
age    NaN  NaN         20
city   NaN  NaN  Amsterdam


In [5]:
# How to get the items of series A not present in series B?
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])
result = ser1[~ser1.isin(ser2)]
print(result)

0    1
1    2
2    3
dtype: int64


In [7]:
# How to get the items not common to both series A and series B?
result = pd.concat([ser1, ser2]).drop_duplicates(keep=False)
print(result)

0    1
1    2
2    3
2    6
3    7
4    8
dtype: int64


In [8]:
# How to get frequency counts of unique items of a series?
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))
frequencyCounts = ser.value_counts()
print(frequencyCounts)

e    5
f    5
h    4
d    4
g    4
a    3
c    3
b    2
Name: count, dtype: int64


In [9]:
# How to convert a numpy array to a dataframe of given shape? (L1)
# Reshape the series ser into a dataframe with 7 rows and 5 columns
ser = pd.Series(np.random.randint(1, 10, 35))
reshaped = ser.values.reshape(7, 5)
df = pd.DataFrame(reshaped)
print(df)

   0  1  2  3  4
0  4  8  6  2  8
1  2  1  9  2  2
2  4  2  3  6  8
3  5  2  6  5  5
4  9  8  3  4  8
5  3  1  7  2  7
6  3  5  5  6  6


In [12]:
# How to find the positions of numbers that are multiples of 3 from a series?
np.random.RandomState(100)
ser = pd.Series(np.random.randint(1, 5, 10))
multiples = ser % 3 == 0
positions = np.where(multiples) [0]
print(positions)

[6]


In [14]:
# How to extract items at given positions from a series
ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz'))
pos = [0, 4, 8, 14, 20]
result = ser[pos]
print(result)

0     a
4     e
8     i
14    o
20    u
dtype: object


In [15]:
# How to stack two series vertically and horizontally ?
ser1 = pd.Series(range(5))
ser2 = pd.Series(list('abcde'))
stacked = pd.concat([ser1, ser2], axis=0)
print(stacked)

0    0
1    1
2    2
3    3
4    4
0    a
1    b
2    c
3    d
4    e
dtype: object


In [16]:
# How to get the positions of items of series A in another series B?
ser1 = pd.Series([10, 9, 6, 5, 3, 1, 12, 8, 13])
ser2 = pd.Series([1, 3, 10, 13])
pos = ser1.isin(ser2)
positions = np.where(pos)[0]
print(positions)

[0 4 5 8]


In [17]:
# How to compute difference of differences between consequtive numbers of a series?
ser = pd.Series([1, 3, 6, 10, 15, 21, 27, 35])
diff = ser.diff()
diffOfDiff = diff.diff()
print(diffOfDiff)

0    NaN
1    NaN
2    1.0
3    1.0
4    1.0
5    1.0
6    0.0
7    2.0
dtype: float64


In [18]:
# How to replace missing spaces in a string with the least frequent character?
myStr = 'dbc deb abed ggade'
ser = pd.Series(list(myStr))
counts = ser.value_counts()
leastFrequent = counts.drop(' ').idxmin()
result = ser.replace(' ', leastFrequent).str.cat(sep='')
print(result)

dbccdebcabedcggade


In [None]:
#  How to check if a dataframe has any missing values?
df.isnull().values.any()
print()