https://towardsdatascience.com/5-elegant-python-pandas-functions-a4bf395ebef4

In [3]:
import pandas as pd
import numpy as np
df = pd.DataFrame({'DATE': [1, 2, 3, 4, 5],
                   'VOLUME': [100, 200, 300,400,500],
                   'PRICE': [214, 234, 253,272,291]})
print(df)

print(df.shift(1))

print(df.shift(1,fill_value=0))


   DATE  VOLUME  PRICE
0     1     100    214
1     2     200    234
2     3     300    253
3     4     400    272
4     5     500    291
   DATE  VOLUME  PRICE
0   NaN     NaN    NaN
1   1.0   100.0  214.0
2   2.0   200.0  234.0
3   3.0   300.0  253.0
4   4.0   400.0  272.0
   DATE  VOLUME  PRICE
0     0       0      0
1     1     100    214
2     2     200    234
3     3     300    253
4     4     400    272


In [4]:
df['PREV_DAY_PRICE'] = df['PRICE'].shift(1,fill_value=0)
print(df)

   DATE  VOLUME  PRICE  PREV_DAY_PRICE
0     1     100    214               0
1     2     200    234             214
2     3     300    253             234
3     4     400    272             253
4     5     500    291             272


In [6]:
df['LAST_3_DAYS_AVE_PRICE'] = (df['PRICE'].shift(1,fill_value=0) + 
                               df['PRICE'].shift(2,fill_value=0) + 
                               df['PRICE'].shift(3,fill_value=0))/3
print(df)

   DATE  VOLUME  PRICE  PREV_DAY_PRICE  LAST_3_DAYS_AVE_PRICE
0     1     100    214               0               0.000000
1     2     200    234             214              71.333333
2     3     300    253             234             149.333333
3     4     400    272             253             233.666667
4     5     500    291             272             253.000000


In [8]:
df['TOMORROW_PRICE'] = df['PRICE'].shift(-1,fill_value=0)
print(df)

   DATE  VOLUME  PRICE  PREV_DAY_PRICE  LAST_3_DAYS_AVE_PRICE  TOMORROW_PRICE
0     1     100    214               0               0.000000             234
1     2     200    234             214              71.333333             253
2     3     300    253             234             149.333333             272
3     4     400    272             253             233.666667             291
4     5     500    291             272             253.000000               0


In [9]:
a = pd.Index([3,3,4,2,1,3, 1, 2, 3, 4, np.nan,4,6,7])
a.value_counts()

3.0    4
4.0    3
2.0    2
1.0    2
6.0    1
7.0    1
Name: count, dtype: int64

In [10]:
b = pd.Series(['ab','bc','cd',1,'cd','cd','bc','ab','bc',1,2,3,2,3,np.nan,1,np.nan])
b.value_counts()

bc    3
cd    3
1     3
ab    2
2     2
3     2
Name: count, dtype: int64

In [13]:
# Also includes normalize
import numpy as np
a = pd.Index([3,3,4,2,1,3, 1, 2, 3, 4, 4,6,7])
a.value_counts(bins=4)

(2.5, 4.0]      7
(0.993, 2.5]    4
(5.5, 7.0]      2
(4.0, 5.5]      0
Name: count, dtype: int64

In [14]:
df = pd.DataFrame(np.arange(15).reshape(-1, 3), columns=['A', 'B','C'])
print(df)

#mask operation to check if element is divided by 2 without any remainder. If match change the sign of the element as original
print(df.mask(df % 2 == 0,-df))


    A   B   C
0   0   1   2
1   3   4   5
2   6   7   8
3   9  10  11
4  12  13  14
    A   B   C
0   0   1  -2
1   3  -4   5
2  -6   7  -8
3   9 -10  11
4 -12  13 -14


In [15]:
import pandas as pd
import numpy as np
df = pd.DataFrame({'HEIGHT': [170,78,99,160,160,130,155,70,70,20],
                   'WEIGHT': [50,60,70,80,90,90,90,50,60,70]},
                   index=['A','B','C','D','E','F','G','H','I','J'])
print(df)

dfl = df.nlargest(3,'HEIGHT')
print(dfl)


   HEIGHT  WEIGHT
A     170      50
B      78      60
C      99      70
D     160      80
E     160      90
F     130      90
G     155      90
H      70      50
I      70      60
J      20      70
   HEIGHT  WEIGHT
A     170      50
D     160      80
E     160      90


In [16]:
dfl = df.nlargest(2,'HEIGHT',keep='all')
print(dfl)

   HEIGHT  WEIGHT
A     170      50
D     160      80
E     160      90


In [17]:
dfl = df.nlargest(2,'HEIGHT',keep='last')
print(dfl)

   HEIGHT  WEIGHT
A     170      50
E     160      90


In [18]:
dfl = df.nlargest(2,'HEIGHT',keep='first')
print(dfl)

   HEIGHT  WEIGHT
A     170      50
D     160      80


In [19]:
import pandas as pd
import numpy as np
df = pd.DataFrame({'HEIGHT': [170,78,99,160,160,130,155,70,70,20],
                   'WEIGHT': [50,60,70,80,90,90,90,50,60,70]},
                   index=['A','B','C','D','E','F','G','H','I','J'])
print(df)
dfs = df.nsmallest(3,'WEIGHT')
print(dfs)

   HEIGHT  WEIGHT
A     170      50
B      78      60
C      99      70
D     160      80
E     160      90
F     130      90
G     155      90
H      70      50
I      70      60
J      20      70
   HEIGHT  WEIGHT
A     170      50
H      70      50
B      78      60
