### Docstrings

In [1]:
def split_and_stack(df, new_names):
    """
    Split a Datafram's columns into two halves and then stack then vertically, returning a new Dataframe with 'new_names' as the column names.

    Args:
        df (Dataframe): The Dataframe to split
        new_names (iterable of str): the column names for the new Dataframe

    Returns:
        Dataframe
    """
    half = int(len(df.columns)/2)
    left = df.iloc[:,:half]
    right = df.iloc[:,half:]
    return pd.DataFrame(data=np.vstack([left.values, right.values]), columns=new_names)

In [None]:
def func(arg):
    """
    Description of what the function does

    Args:
        arg (type, optional): description of arg. Write optional when an argument has a default value

    Returns:
        type:  description of return value

    Raises:
        ValueError: include any error types that the function intentionally raises

    Notes:
        See https://google.github.io/styleguide/pyguide.html for more info
    """
return arg

### Don't repeat yourself (DRY)

In [None]:
def load_and_plot(path):
    """
    Load a data set and plot the first two principal components

    Args:
        path(str): location of file

    Returns:
        tuple of ndarray: (features, labels)
    """
    data = pd.read_csv(path)
    y = data['label'].values
    X = data[col for col in train.columns if col != 'label'].values
    pca = PCA(n_components=2).fit_transform(X)
    plt.scatter(pca[:,0],pca[:,1])
    return X, y

In [None]:
train_X, train_y = load_and_plot('train.csv')

In [None]:
val_X, val_y = load_and_plot('validation.csv')

In [None]:
test_X, test_y = load_and_plot('test.csv')

### Do one thing

In [None]:
def load_data(path):
    """
    Load a data set

    Args:
        path(str): location of file

    Returns:
        tuple of ndarray: (features, labels)
    """
    data = pd.read_csv(path)
    y = data['label'].values
    X = data[col for col in train.columns if col != 'label'].values
    return X, y

In [None]:
def plot_data(X):
    """
    Plot the first two principal components

    Args:
        X(numpy.ndarrya): The data to plot
    """
    pca = PCA(n_components=2).fit_transform(X)
    plt.scatter(pca[:,0],pca[:,1])

### Pass by assignment

In [4]:
def foo(x):
    """
    Change first element to 99

    Args:
        x(list): list to change
    """
    x[0] = 99

my_list = [1,2,3]

foo(my_list)
print(my_list)

[99, 2, 3]


In [7]:
def bar(x): #x and my_var at same position in memory
    """
    x+90

    Args:
        x(int): value to add 90
    """
    x = x + 90 #assign to a new position in memory

my_var = 3
bar(my_var)
print(my_var) #my_var still pointing to same location in memory

3


In [8]:
immutable = ['int', 'float', 'bool','string','bytes', 'tuple', 'frozenset', None]
mutable = ['list', 'dict','set', 'bytearray','objects', 'functions']

### Dangerous default

In [9]:
def foo(var=[]):
    """
    Append 1

    Args:
        var(list,optional): list to append
    
    Returns:
        List: list with 1
    """
    var.append(1)
    return var

In [10]:
foo()

[1]

In [11]:
foo()

[1, 1]

In [12]:
def foo(var=None):
    """
    Append 1

    Args:
        var(list,optional): list to append
    
    Returns:
        List: list with 1
    """
    if not var:
        var = []
    var.append(1)
    return var

In [13]:
foo()

[1]

In [14]:
foo()

[1]

In [15]:
foo([])

[1]

In [16]:
foo([2])

[2, 1]

In [17]:
foo()

[1]

### Context Manager

In [1]:
import wget

In [4]:
wget.download('https://archive.org/download/datasets_getting_started_with_spark_two/students.txt','.')

'./students.txt'

In [5]:
with open('students.txt') as my_file:
    #open() sets up a context by opening a file
    text = my_file.read() #Let me run any code i want on that file
    length = len(text)
    #Removes the context byclosing the file
print('The file is {} characters long'.format(length))

The file is 58 characters long


In [6]:
#with context-manager(args) as variable:
    #code inside the context
#context is removed

In [8]:
from contextlib import contextmanager

In [9]:
#create a context manager
@contextmanager
def my_context():
    print('hello')
    yield 42
    print('goodbye')


In [10]:
with my_context() as foo:
    print('foo is {}'.format(foo))

hello
foo is 42
goodbye


In [13]:
@contextmanager
def database(url):
    db = postgres.connect(url)
    yield db
    db.disconnect()

In [16]:
#url = 'http://datacamp.com/data'
#with database(url) as my_db:
#    course_list = my_db.execute('select * from courses')

### Nested Contexts

In [18]:
#fail if file dont fit in memory
def copy(src, dst):
    """
    Copy the contents of one file to another.

    Args:
        src(str): File name of the file to the copied
        dst(str): where to write the new file
    """
    with open(src) as origem:
        contents = origem.read()

    with open(dst,'w') as destino:
        destino.write(contents)

In [17]:
#better approch
def copy(src, dst):
    """
    Copy the contents of one file to another.

    Args:
        src(str): File name of the file to the copied
        dst(str): where to write the new file
    """
    with open(src) as origem:
        with open(dst,'w') as destino:
            for line in origem:  
                destino.write(contents)

### Handling erros

In [20]:
def get_printer(ip):
    p = connect_to_printer(ip)

    try:
        yield
    finally:
        p.disconnect()
        print('disconnect from printer')

### Function is an object

In [22]:
x = print
x('hello')

hello


In [28]:
list_func = [sum,help,print]

In [34]:
list_func[2](list_func[0]([2,2]))


4


In [35]:
dict_func ={'func1':sum,'func2':help,'func3':print}

In [39]:
dict_func['func3']('hello')

hello


In [40]:
def has_docstring(func):
    """
    Check to see if the func 'func' has a docstring

    Args:
        func (callable): A function

    Returns:
        bool
    """
    return func.__doc__ is not None

In [41]:
def no():
    return 4

In [42]:
[has_docstring(no), has_docstring(print)]

[False, True]

## Scope

In [50]:
x = 7
y = 2
print(x) #global scope
print(y)

7
2


In [51]:
def foo():
    x = 42
    print(x) #local scope
    print(y)
foo()

42
2


In [55]:
def bar():
    y=5 #non local scope
    def moo():
        x = 55
        print(x) #local scope
        print(y)
    moo()

In [56]:
bar()

55
5


Closure

In [2]:
x = 25

def foo(value):
    def bar():
        print(value)
    return bar

my_func = foo(x) #add x=25 to foo. my_func = foo(25). Value persist in foo closure
my_func()

25


In [3]:
del(x)
my_func()

25


In [4]:
my_func.__closure__[0].cell_contents

25

@Decorators

In [6]:
#change input, output or function

def double_args(func):
    def wrapper(a,b):
        return func(a*2,b*2)
    return wrapper

@double_args #read as multiply = double_args(multiply)
def multiply(a,b):
    return a * b

multiply(1,5)

20

In [14]:
import time
from functools import wraps #without it we lose metadata

def timer(func):
    """
    A decorator that prints how long a function took to run
    """
    @wraps(func)
    def wrapper(*args, **kwargs):
        start = time.time()
        result = func(*args, **kwargs)
        total = time.time() - start
        print('{} took {}s'.format(func.__name__,total))
        return result
    return wrapper

In [17]:
@timer
def sleep(n):
    """Sleep 3 sec"""
    time.sleep(n)

In [16]:
sleep(3)

sleep took 3.0030527114868164s


In [18]:
print(sleep.__doc__)

Sleep 3 sec


In [19]:
print(sleep.__name__)

sleep


Memorization

In [1]:
from functools import lru_cache

In [10]:
def fibonacci_of(n):
    """Calculate fibonacci of n"""
    if n in {0, 1}:  # Base case
        return n
    return fibonacci_of(n - 1) + fibonacci_of(n - 2) 

In [9]:
@lru_cache
def fibonacci_lru(n):
    """Calculate fibonacci of n"""
    if n in {0, 1}:  # Base case
        return n
    return fibonacci_lru(n - 1) + fibonacci_lru(n - 2) 

In [7]:
%timeit fibonacci_of(20)

1.04 ms ± 3.92 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [8]:
%timeit fibonacci_lru(20)

34.2 ns ± 0.031 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)


Args in Decorator

In [20]:
from functools import wraps

def run_n_time(n):
    """ Define and return a decorator"""
    def decorator(func):
        @wraps(func)
        def wrapper(*args, **kwargs):
            for i in range(n):
                func(*args, **kwargs)
        return wrapper
    return decorator

In [21]:
@run_n_time(3)
def print_sum(a,b):
    print(a + b)

In [22]:
print_sum(1,2)

3
3
3
