## Printing integers with spaces

In [7]:
for each in [1,2,3,4,5,6,7,8,9]: 
    print(each, end=' ') 
 
list1 = [1,2,3,4,5,6,7,8,9] 
# print(*list1) 

1 2 3 4 5 6 7 8 9 

In [8]:
'   spacious   '.strip()
# 'spacious'
"AABAA".strip("A")
# 'B'
"ABBA".strip("AB")
# ''
"ABCABBA".strip("AB")
# 'C'
'www.example.com'.strip('cmowz.') # this example extracts web address
# 'example'

'example'

# How to make python run faster

In [9]:
# Use built-in functions

wordlist = ['hi','bye']
newlist = []

for word in wordlist:
    newlist.append(word.upper())
    

newlist = map(str.upper, wordlist)

In [10]:
# Use List comprehension instead of loops

newlist = []
for i in range(1, 100):
    if i % 2 == 0:
        newlist.append(i**2)

# Faster
newlist = [i**2 for i in range(1, 100) if i%2==0]

In [11]:
# Only import from modules and libraries what you need.
import math
from math import sqrt
import time

start_time = time.perf_counter()
value = math.sqrt(50)
end_time = time.perf_counter()
all_time = end_time - start_time

print("Import all: " + str(end_time - start_time))

#Faster

start_time = time.perf_counter()
value = sqrt(50)
end_time = time.perf_counter()
some_time = end_time - start_time
print("Import some: " + str(end_time - start_time))
print("% Diff: " + str((all_time - some_time) / all_time * 100))

Import all: 4.1799999998204385e-05
Import some: 5.390000000105033e-05
% Diff: -28.947368429104614


In [12]:
# Use Join instead of + for string concatenation

output = "Programming" + "is" + "fun"

output = " ".join(["Programming" , "is", "fun"])

# Python Timer

In [13]:
import time

start_time = time.perf_counter()
end_time = time.perf_counter()
time_taken = end_time - start_time
print(time_taken)

4.299999999801685e-05


In [14]:
#Timer Class

import time

class TimerError(Exception): #Inherits from the Exception Class to create a custom error
    """A custom exception used to report errors in use of Timer class"""

class Timer:
    def __init__(self, text="Elapsed time: {:0.4f} seconds"): 
        #text is given as a string as f-strings evaluate immediately, and when you instantiate Timer, your code hasn’t yet calculated the elapsed time.
        #text output can be edited now with t = Timer(text="You waited {:.1f} seconds")
        
        #Instance variables
        self._start_time = None
        self.text = text

    def start(self):
        """Start a new timer"""
        if self._start_time is not None:
            raise TimerError(f"Timer is running. Use .stop() to stop it")

        self._start_time = time.perf_counter() 
        #Note: The underscore (_) prefix of ._start_time is a Python convention. 
        #It signals that ._start_time is an internal attribute that users of the Timer class shouldn’t manipulate.

    def stop(self):
        """Stop the timer, and report the elapsed time"""
        if self._start_time is None:
            raise TimerError(f"Timer is not running. Use .start() to start it")

        elapsed_time = time.perf_counter() - self._start_time
        self._start_time = None
        print(self.text.format(elapsed_time))

t = Timer()
t.start()
time.sleep(2)
t.stop()

Elapsed time: 2.0036 seconds


# Inserting numbers into strings

In [15]:
print(f"Downloaded the tutorial in {start_time - end_time:0.4f} seconds")

Downloaded the tutorial in -0.0000 seconds


# Object Oriented Programming

In [16]:
class Dog:
    species = "Canis familiaris"

    def __init__(self, name, age):
        self.name = name
        self.age = age

#     # Instance method
#     def description(self):
#         return f"{self.name} is {self.age} years old"

    # Another instance method
    def speak(self, sound):
        return f"{self.name} says {sound}"
    
    def __str__(self): #Creates description for class to call using ?
        return f"{self.name} is {self.age} years old"
    
miles = Dog("Miles", 4)
print(miles)

Miles is 4 years old


# Dictionaries

In [17]:
#Defining Dicts
squares = {x: x * x for x in range(6)}
phonebook = {
"bob": 7387,
"alice": 3719,
"jack": 7052,
}

# defaultdict
The defaultdict class is another dictionary subclass that accepts a callable in its constructor whose return value will be used if a requested key cannot be found.

This can save you some typing and make your intentions clearer as compared to using get() or catching a KeyError exception in regular dictionaries

In [18]:
from collections import defaultdict
dd = defaultdict(list)

# Accessing a missing key creates it and
# initializes it using the default factory,
# i.e. list() in this example:
dd["dogs"].append("Rufus")
dd["dogs"].append("Kathrin")
dd["dogs"].append("Mr Sniffles")

dd["dogs"]

['Rufus', 'Kathrin', 'Mr Sniffles']

# ChainMap
The collections.ChainMap data structure groups multiple dictionaries into a single mapping. Lookups search the underlying mappings one by one until a key is found. Insertions, updates, and deletions only affect the first mapping added to the chain

In [19]:
from collections import ChainMap
dict1 = {"one": 1, "two": 2}
dict2 = {"three": 3, "four": 4}
chain = ChainMap(dict1, dict2)

print(chain)
# ChainMap searches each collection in the chain
# from left to right until it finds the key (or fails):
print(chain["three"])
print(chain["one"])
# print(chain["missing"])

ChainMap({'one': 1, 'two': 2}, {'three': 3, 'four': 4})
3
1


# MappingProxyType
Immutable Dictionary

Can be helpful if, for example, you’d like to return a dictionary carrying internal state from a class or module while discouraging write access to this object. 

Using MappingProxyType allows you to put these restrictions in place without first having to create a full copy of the dictionary

In [20]:
from types import MappingProxyType
writable = {"one": 1, "two": 2}
read_only = MappingProxyType(writable)

# The proxy is read-only:
print(read_only["one"])

1


In [21]:
# read_only["one"] = 23

In [22]:
# Updates to the original are reflected in the proxy:
writable["one"] = 42
read_only

mappingproxy({'one': 42, 'two': 2})

# Data Structures

## Tuple

In [6]:
tup = 4, 5, 6
nested_tup = (4, 5, 6), (7, 8)
tup = tuple('string')
tup = tuple(['foo', [1, 2], True]) #mutable obj stored in tuples become immutable
tup[1].append(3) # However things within mutable elements are still mutable. []
(4, None, 'foo') + (6, 0) + ('bar',) #concat with +
('foo', 'bar') * 4
a, b, c = tup #multi assignment

a, b = 1, 2
b, a = a, b #Swapping

seq = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
for a, b, c in seq:
    print('a={0}, b={1}, c={2}'.format(a, b, c))

values = 1, 2, 3, 4, 5
a, b, *rest = values
print(*rest)

a, b, *_ = values # *_ for unwanted values

a = (1, 2, 2, 2, 3, 4, 2)
a.count(2)

a=1, b=2, c=3
a=4, b=5, c=6
a=7, b=8, c=9
3 4 5


4

## List

In [19]:
b_list = list(('foo', 'bar', 'baz')) #tuple to list
print(list(range(10))) #Range

b_list.append('dwarf')
b_list.insert(1, 'red') #pos & element, insert more expensive than append as more shifting required.

b_list.pop(2)
b_list.remove('dwarf') #removes the first value

print([4, None, 'foo'] + [7, 8, (2, 3)]) #concat

x = [4, None, 'foo']
x.extend([7, 8, (2, 3)]) #adding multiple types of elements to existing list, expensive
print(x) 

a = [7, 2, 5, 1, 3]
print(a.sort())

b = ['saw', 'small', 'He', 'foxes', 'six']
print(b.sort(key=len))

import bisect #implements binary search and insertion into a sorted list.
c = [1, 2, 2, 2, 3, 4, 7]
print(bisect.bisect(c, 2)) #finds the location where an element should be inserted to keep it sorted
print(bisect.insort(c, 6)) #finds location and inserts

# for i,value in enumerate(c):
#     print(i)

some_list = ['foo', 'bar', 'baz'] #using Enumerate to create mappings
mapping = {}
for i, v in enumerate(some_list):
    mapping[v] = i

print(sorted([7, 1, 2, 6, 0, 3, 2]))
print(sorted('horse race'))

seq1 = ['foo', 'bar', 'baz']
seq2 = ['one', 'two', 'three']
zipped = zip(seq1, seq2) #number of elements taken in depends on shortest
print(list(zipped))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[4, None, 'foo', 7, 8, (2, 3)]
[4, None, 'foo', 7, 8, (2, 3)]
None
None
4
None
[0, 1, 2, 2, 3, 6, 7]
[' ', 'a', 'c', 'e', 'e', 'h', 'o', 'r', 'r', 's']
[('foo', 'one'), ('bar', 'two'), ('baz', 'three')]


In [21]:
for i, (a, b) in enumerate(zip(seq1, seq2)):
    print('{0}: {1}, {2}'.format(i, a, b))

print(list(reversed(range(10))))

pitchers = [('Nolan', 'Ryan'), ('Roger', 'Clemens'),('Schilling', 'Curt')] #convert list of rows into columns
first_names, last_names = zip(*pitchers)
print(first_names)
print(last_names)

0: foo, one
1: bar, two
2: baz, three
[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]
('Nolan', 'Roger', 'Schilling')
('Ryan', 'Clemens', 'Curt')


## Dictionaries

In [49]:
d1 = {'a' : 'some value', 'b' : [1, 2, 3, 4]}
print('b' in d1) #Check if key present

d1['dummy'] = 'another value'

d1[5]='some value'
print(d1)
del d1[5]
print(d1)

ret = d1.pop('dummy')
print(ret)
print(d1)

print(list(d1.keys()))
print(list(d1.values()))

d1.update({'b' : 'foo', 'c' : 12}) #combining dicts
print(d1)


mapping = dict(zip(range(5), reversed(range(5)))) #Creating Dicts from sequences
print(mapping)

#Get key or return default values
# value = some_dict.get(key, default_value) 

words = ['apple', 'bat', 'bar', 'atom', 'book']
by_letter = {}
for word in words:
    letter = word[0]
    by_letter.setdefault(letter, []).append(word)
print(by_letter)

print(by_letter.get('a','default value if key missing'))
print(by_letter.get('c','default value if key missing'))

#Keys of dict need to be hashable(immutable), check using hash function 
print(hash('string'))

#print(hash([1,2,3])) #this will fail as lists are mutable
#convert them to tuples
d = {}
d[tuple([1, 2, 3])] = 5
print(d)

True
{'a': 'some value', 'b': [1, 2, 3, 4], 'dummy': 'another value', 5: 'some value'}
{'a': 'some value', 'b': [1, 2, 3, 4], 'dummy': 'another value'}
another value
{'a': 'some value', 'b': [1, 2, 3, 4]}
['a', 'b']
['some value', [1, 2, 3, 4]]
{'a': 'some value', 'b': 'foo', 'c': 12}
{0: 4, 1: 3, 2: 2, 3: 1, 4: 0}
{'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']}
['apple', 'atom']
default value if key missing
-7527390285843405497
{(1, 2, 3): 5}


## Set

Dict but with only keys

See table 3-1 for commonly used set methods

In [54]:
set([2, 2, 2, 1, 3, 3])

a = {1, 2, 3, 4, 5}
b = {3, 4, 5, 6, 7, 8}

print(a.union(b))
print(a|b) #union, common elements between the 2

#interesect
print(a.intersection(b))
print(a & b)

# a.add(x) N/A Add element x to the set a
# a.clear() N/A Reset the set a to an empty state, discarding all of
# its elements
# a.remove(x) N/A Remove element x from the set a
# a.pop() N/A Remove an arbitrary element from the set a, raising
# KeyError if the set is empty
# a.union(b) a | b All of the unique elements in a and b
# a.update(b) a |= b Set the contents of a to be the union of the
# elements in a and b
# a.intersection(b) a & b All of the elements in both a and b
# a.intersection_update(b) a &= b Set the contents of a to be the intersection of the
# elements in a and b
# a.difference(b) a - b The elements in a that are not in b
# a.difference_update(b) a -= b Set a to the elements in a that are not in b
# a.symmetric_difference(b) a ^ b All of the elements in either a or b but not both
# a.symmetric_difference_update(b) a ^= b Set a to contain the elements in either a or b but
# not both
# a.issubset(b) N/A True if the elements of a are all contained in b
# a.issuperset(b) N/A True if the elements of b are all contained in a
# a.isdisjoint(b) N/A True if a and b have no elements in common


{1, 2, 3, 4, 5, 6, 7, 8}
{1, 2, 3, 4, 5, 6, 7, 8}
{3, 4, 5}
{3, 4, 5}


## List,Set,Dict Comprehension
They allow you to concisely form a new list by filtering the elements of a collection, transforming the elements passing the filter in one concise expression.

In [57]:
strings = ['a', 'as', 'bat', 'car', 'dove', 'python']
[x.upper() for x in strings if len(x) > 2]

In [60]:
#dict_comp = {key-expr : value-expr for value in collection if condition}
loc_mapping = {val : index for index, val in enumerate(strings)}
loc_mapping

{'a': 0, 'as': 1, 'bat': 2, 'car': 3, 'dove': 4, 'python': 5}

In [63]:
#set_comp = {expr for value in collection if condition}
unique_lengths = {len(x) for x in strings}
unique_lengths

{1, 2, 3, 4, 6}

In [64]:
set(map(len, strings))

{1, 2, 3, 4, 6}

In [66]:
#Nested Lists
all_data = [['John', 'Emily', 'Michael', 'Mary', 'Steven'],
            ['Maria', 'Juan', 'Javier', 'Natalia', 'Pilar']]
result = [name for names in all_data for name in names 
          if name.count('e') >= 2]
result

['Steven']

In [67]:
some_tuples = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
flattened = [x for tup in some_tuples for x in tup]
flattened

[1, 2, 3, 4, 5, 6, 7, 8, 9]

## Functions

In [74]:
#Cleaning dirty data
import re
def clean_strings(strings):
    result = []
    for value in strings:
        value = value.strip() #remove front and back spaces or inside ()
        value = re.sub('[!#?]', '', value) # replaces !?#
        value = value.title() #First letter is capital
        result.append(value)
    return result

states = [' Alabama ', 'Georgia!', 'Georgia', 'georgia', 'FlOrIda',
          'south carolina##', 'West virginia?']
clean_strings(states)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South Carolina',
 'West Virginia']

In [76]:
#Making a list of operations to apply in order, easier to see how the strings are manipulated
def remove_punctuation(value):
    return re.sub('[!#?]', '', value)

clean_ops = [str.strip, remove_punctuation, str.title]

def clean_strings(strings, ops):
    result = []
    for value in strings:
        for function in ops:
            value = function(value)
        result.append(value)
    return result

clean_strings(states, clean_ops)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South Carolina',
 'West Virginia']

In [78]:
for x in map(remove_punctuation, states):
    print(x)

 Alabama 
Georgia
Georgia
georgia
FlOrIda
south carolina
West virginia


# Lambda Functions

In [83]:
strings = ['foo', 'card', 'bar', 'aaaa', 'abab']
strings.sort(key=lambda x: len(set(list(x)))) #list splits, set finds distinct
strings

['aaaa', 'foo', 'abab', 'bar', 'card']

## Currying
deriving functions from other functions

In [86]:
def add_numbers(x, y):
    return x + y
add_five = lambda y: add_numbers(5,y) #2nd var y is said to be curried.


from functools import partial
add_five = partial(add_numbers, 5)

## Generators
A generator is a concise way to construct a new iterable object.

When would you need a generator?

In [89]:
def squares(n=10):
    print('Generating squares from 1 to {0}'.format(n ** 2))
    for i in range(1, n + 1):
        yield i ** 2
        
#Only executes when you request elements from the generator
gen = squares()
gen


<generator object squares at 0x000001896C065A98>

In [94]:
for x in gen:
    print(x, end=' ')

## Itertools module
The standard library itertools module has a collection of generators for many common
data algorithms. For example, groupby takes any sequence and a function,
grouping consecutive elements in the sequence by return value of the function. Here’s
an example

Check out more iterative tools

In [112]:
import itertools

first_letter = lambda x: x[0]
names = ['Alan', 'Adam', 'Wes', 'Will', 'Albert', 'Steven']

#Takes in a seq & func to return a tuple, grouping using the result of the function
for letter, names in itertools.groupby(names, first_letter): 
    print(letter, list(names)) # names is a generator

A ['Alan', 'Adam']
W ['Wes', 'Will']
A ['Albert']
S ['Steven']


In [113]:
# combinations(iterable, k) Generates a sequence of all possible k-tuples of elements in the iterable,
# ignoring order and without replacement (see also the companion function
# combinations_with_replacement)
# permutations(iterable, k) Generates a sequence of all possible k-tuples of elements in the iterable,
# respecting order
# groupby(iterable[, keyfunc]) Generates (key, sub-iterator) for each unique key
# product(*iterables, repeat=1) Generates the Cartesian product of the input iterables as tuples, similar to a
# nested for loop

## Error Handling

Keep track of the different kinds of erros you come across

In [115]:
def attempt_float(x):
    try:
        return float(x)
    except (TypeError, ValueError):
        return x

In [119]:
# #Dont supress exception but still want some code to be executed.

# f = open(path, 'w')
# try:
#     write_to_file(f)
# except:
#     print('Failed')
# else:
#     print('Succeeded')
# finally:
#     f.close() # f will ALWAYS close
    


## Files & Operating Systems

In [121]:
# path = 'examples/segismundo.txt'
# f = open(path)

# for line in f: # Iterable
#     pass

# lines = [x.rstrip() for x in open(path)] #There will be EOL markers 

# # ['Sueña el rico en su riqueza,',
# # 'que más cuidados le ofrece;',]

# f.close() #Important to close file to release resources


In [122]:
# #Use this instead as it auto closes the file
# with open(path) as f:
#     lines = [x.rstrip() for x in f]
    


# Chapter 5: Getting Started with Pandas

## Intro to pandas Data Structures

pandas good for tabular or heterogeneous data.
NumPy best homogeneous numerical array data.

## Series

In [129]:
import pandas as pd
obj = pd.Series([1,2,3,4],index = ['a','b','c','d'])
obj['a'] #Can use index as locator

1

In [130]:
#checking index
'b' in obj

True

In [136]:
#Dict to Series
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
obj3 = pd.Series(sdata)
obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [140]:
states = ['California', 'Ohio', 'Oregon', 'Texas']
obj4 = pd.Series(sdata, index=states)
#Detecting null objects
pd.isnull(obj4)
pd.notnull(obj4)
obj4.isnull() #instance method

California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [142]:
obj3 + obj4 #Join

California         NaN
Ohio           70000.0
Oregon         32000.0
Texas         142000.0
Utah               NaN
dtype: float64

In [143]:
obj4.name = 'population'
obj4.index.name = 'state'
obj4

state
California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
Name: population, dtype: float64

## Dataframes

In [None]:
#del to remove col
