# Python Pandas: Tricks & Features You May Not Know
by Brad Solomon. 
[Tutorial source.](https://realpython.com/python-pandas-tricks/)

- Author: Israel Oliveira [\[e-mail\]](mailto:'Israel%20Oliveira%20'<prof.israel@gmail.com>)

In [1]:
%load_ext watermark

In [2]:
import pandas as pd

In [3]:
# Run this cell before close.
%watermark -d --iversion -b -r -g -m -v

pandas 1.0.5
2020-07-14 

CPython 3.7.7
IPython 7.15.0

compiler   : GCC 8.3.0
system     : Linux
release    : 4.19.76-linuxkit
machine    : x86_64
processor  : 
CPU cores  : 16
interpreter: 64bit
Git hash   : de011420f6f873f7700cf3725a82248aff32f43e
Git repo   : https://github.com/ysraell/examples.git
Git branch : master


In [4]:
daterng = pd.Series(pd.date_range('2017', periods=9, freq='Q'))

In [5]:
daterng

0   2017-03-31
1   2017-06-30
2   2017-09-30
3   2017-12-31
4   2018-03-31
5   2018-06-30
6   2018-09-30
7   2018-12-31
8   2019-03-31
dtype: datetime64[ns]

In [6]:
daterng.dt.day_name()

0      Friday
1      Friday
2    Saturday
3      Sunday
4    Saturday
5    Saturday
6      Sunday
7      Monday
8      Sunday
dtype: object

In [7]:
daterng[daterng.dt.quarter > 2]

2   2017-09-30
3   2017-12-31
6   2018-09-30
7   2018-12-31
dtype: datetime64[ns]

In [8]:
daterng[daterng.dt.is_year_end]

3   2017-12-31
7   2018-12-31
dtype: datetime64[ns]

In [10]:
type(daterng.dt)

pandas.core.indexes.accessors.DatetimeProperties

In [12]:
pd.core.indexes.accessors.DatetimeProperties

pandas.core.indexes.accessors.DatetimeProperties

In [16]:
from itertools import product
import numpy as np

In [17]:
datecols = ['year', 'month', 'day']
df = pd.DataFrame(list(product([2017, 2016], [1, 2], [1, 2, 3])),columns=datecols)
df['data'] = np.random.randn(len(df))
df

Unnamed: 0,year,month,day,data
0,2017,1,1,0.960279
1,2017,1,2,0.972377
2,2017,1,3,-2.155526
3,2017,2,1,1.229687
4,2017,2,2,1.689815
5,2017,2,3,-1.357509
6,2016,1,1,-0.117566
7,2016,1,2,-1.440941
8,2016,1,3,-1.749074
9,2016,2,1,-0.096534


In [18]:
df.index = pd.to_datetime(df[datecols])
df

Unnamed: 0,year,month,day,data
2017-01-01,2017,1,1,0.960279
2017-01-02,2017,1,2,0.972377
2017-01-03,2017,1,3,-2.155526
2017-02-01,2017,2,1,1.229687
2017-02-02,2017,2,2,1.689815
2017-02-03,2017,2,3,-1.357509
2016-01-01,2016,1,1,-0.117566
2016-01-02,2016,1,2,-1.440941
2016-01-03,2016,1,3,-1.749074
2016-02-01,2016,2,1,-0.096534


In [21]:
df.drop(datecols, axis=1).squeeze()

2017-01-01    0.960279
2017-01-02    0.972377
2017-01-03   -2.155526
2017-02-01    1.229687
2017-02-02    1.689815
2017-02-03   -1.357509
2016-01-01   -0.117566
2016-01-02   -1.440941
2016-01-03   -1.749074
2016-02-01   -0.096534
2016-02-02   -0.771939
2016-02-03   -2.616831
Name: data, dtype: float64

In [23]:
>>> colors = pd.Series([
...     'periwinkle',
...     'mint green',
...     'burnt orange',
...     'periwinkle',
...     'burnt orange',
...     'rose',
...     'rose',
...     'mint green',
...     'rose',
...     'navy'
... ])
...
>>> import sys
>>> colors.apply(sys.getsizeof)


0    59
1    59
2    61
3    59
4    61
5    53
6    53
7    59
8    53
9    53
dtype: int64

In [25]:
>>> mapper = {v: k for k, v in enumerate(colors.unique())}
>>> mapper

{'periwinkle': 0, 'mint green': 1, 'burnt orange': 2, 'rose': 3, 'navy': 4}

In [26]:
>>> as_int = colors.map(mapper)
>>> as_int

0    0
1    1
2    2
3    0
4    2
5    3
6    3
7    1
8    3
9    4
dtype: int64

In [27]:
as_int.apply(sys.getsizeof)

0    24
1    28
2    28
3    24
4    28
5    28
6    28
7    28
8    28
9    28
dtype: int64

In [29]:
colors.factorize()

(array([0, 1, 2, 0, 2, 3, 3, 1, 3, 4]),
 Index(['periwinkle', 'mint green', 'burnt orange', 'rose', 'navy'], dtype='object'))

In [31]:
df

Unnamed: 0,year,month,day,data
2017-01-01,2017,1,1,0.960279
2017-01-02,2017,1,2,0.972377
2017-01-03,2017,1,3,-2.155526
2017-02-01,2017,2,1,1.229687
2017-02-02,2017,2,2,1.689815
2017-02-03,2017,2,3,-1.357509
2016-01-01,2016,1,1,-0.117566
2016-01-02,2016,1,2,-1.440941
2016-01-03,2016,1,3,-1.749074
2016-02-01,2016,2,1,-0.096534
