In [1]:
import pandas as pd

# Headers

Always use them to stay organized! You should use Markdown cells for headers and text. The hotkey for turning a cell into Markdown is "m". To turn the cell back into code, you can press "y".

# View class/function info in notebook

Sometimes we have a function we want to invoke, but we forget exactly what arguments to pass. Rather than looking up documentation, we can just append a "?" and run the cell.

This gives us the function call signature and a docstring.

In [12]:
pd.DataFrame?

If we want more detail, we can directly inspect the source code by appending "??"

In [13]:
pd.DataFrame??

# Dir: inspect attributes and methods

Sometimes you have a python object and you want to know what attributes and methods it has. Rather than looking up the documentation online, try running "dir()" on the object

In [2]:
df = pd.DataFrame({"name": ["shrek", "donkey", "fiona"], "score": [999, 47, 33]})
df

Unnamed: 0,name,score
0,shrek,999
1,donkey,47
2,fiona,33


In [3]:
dir(df)

['T',
 '_AXIS_LEN',
 '_AXIS_ORDERS',
 '_AXIS_TO_AXIS_NUMBER',
 '_HANDLED_TYPES',
 '__abs__',
 '__add__',
 '__and__',
 '__annotations__',
 '__array__',
 '__array_priority__',
 '__array_ufunc__',
 '__array_wrap__',
 '__bool__',
 '__class__',
 '__contains__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__divmod__',
 '__doc__',
 '__eq__',
 '__finalize__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__ifloordiv__',
 '__imod__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__',
 '__pos__',
 '__pow__',
 '__radd__',
 '__rand__',
 '__rdivmod__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 

But dir can include a lot of dunder methods (like \_\_init\_\_) that we don't really care about, so we can define a small helper to filter these out

In [8]:
import regex as re

In [9]:
def dirg(obj):
    
    members = dir(obj)
    
    # filter out dunders
    members = [m for m in members if not m.startswith("__")]
    
    # somewhat convoluted way to filter out unders
    p_under = re.compile(r"^(?!\_[a-z])", flags=re.IGNORECASE)
    members = list(filter(p_under.search, members))
    
    return members

In [11]:
# much better
dirg(df)

['T',
 'abs',
 'add',
 'add_prefix',
 'add_suffix',
 'agg',
 'aggregate',
 'align',
 'all',
 'any',
 'append',
 'apply',
 'applymap',
 'asfreq',
 'asof',
 'assign',
 'astype',
 'at',
 'at_time',
 'attrs',
 'axes',
 'backfill',
 'between_time',
 'bfill',
 'bool',
 'boxplot',
 'clip',
 'columns',
 'combine',
 'combine_first',
 'compare',
 'convert_dtypes',
 'copy',
 'corr',
 'corrwith',
 'count',
 'cov',
 'cummax',
 'cummin',
 'cumprod',
 'cumsum',
 'describe',
 'diff',
 'div',
 'divide',
 'dot',
 'drop',
 'drop_duplicates',
 'droplevel',
 'dropna',
 'dtypes',
 'duplicated',
 'empty',
 'eq',
 'equals',
 'eval',
 'ewm',
 'expanding',
 'explode',
 'ffill',
 'fillna',
 'filter',
 'first',
 'first_valid_index',
 'flags',
 'floordiv',
 'from_dict',
 'from_records',
 'ge',
 'get',
 'groupby',
 'gt',
 'head',
 'hist',
 'iat',
 'idxmax',
 'idxmin',
 'iloc',
 'index',
 'infer_objects',
 'info',
 'insert',
 'interpolate',
 'isin',
 'isna',
 'isnull',
 'items',
 'iteritems',
 'iterrows',
 'itertupl