# Python Quick Course

Get Python from python.org

#### Virtual Enviroments

In [None]:
# Better for specific versions of Python or packages/libraries
# Options: Anaconda, virtualenv, preferent venv (python integrated module)

# venv steps:
# 1. Download and install Python.
#   Exec following commands in PowerShell terminal: 
# 2. Move to desired directory to create virtual env.
# 3. Exec: & "C:\Ruta\a\pythonkernel\python.exe" -m venv env_name
#   Activate:
# 4. Exec: Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
# 5. Exec: .\env_name\Scripts\Activate.ps1
#    Terminal change to indicate active env

#   Optional if Jupyter Notebooks are needed:
# 6. Exec: pip install jupyter ipykernel
# 7. Exec: python -m ipykernel install --user --name env_name --display-name "kernel_alias"

### Python Zen

In [1]:
import this

The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!


### Blanks & Spaces Format

In [None]:
# Identation is used by Python to limit code blocks
# This symbol "#" indicates a coment starting, coments are ignored bye Python. Useful for everyone who reads the code

for i in [1, 2, 3, 4, 5]:
    print(i)                        # First line of block "for i"
    for j in [1, 2, 3, 4, 5]:
        print(j)                    # First line of block "for j"
        print(i + j)                # Last line of block "for j"
    print(i)                        # Last line of block "for i"
print("loop done")

1
1
2
2
3
3
4
4
5
5
6
1
2
1
3
2
4
3
5
4
6
5
7
2
3
1
4
2
5
3
6
4
7
5
8
3
4
1
5
2
6
3
7
4
8
5
9
4
5
1
6
2
7
3
8
4
9
5
10
5
loop done


In [4]:
# Blank spaces are ignored into "()" and "[]" useful for huge calculations

long_computation = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 
                    +
                    13, 14, 15, 16, 17, 18, 19, 20)

# And useful to read code easier

list_of_lists = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
easier_to_read = [[1, 2, 3],
                  [4, 5, 6],
                  [7, 8, 9]]

print(long_computation)
print(list_of_lists)
print(easier_to_read)

(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)
[[1, 2, 3], [4, 5, 6], [7, 8, 9]]
[[1, 2, 3], [4, 5, 6], [7, 8, 9]]


In [6]:
# Also a backslash "\" could be used to indicate that an instruction continues at next line (rarely used)

two_plus_three = 2 + \
    3

print(two_plus_three)

5


### Modules

In [None]:
# Import modules to be able to use external functions or functions not loaded by default

# Import module 
import re
my_regex = re.compile('[0-9]+', re.I)       # This way, the preffix "re." is needed to access the functions
print(my_regex)

re.compile('[0-9]+', re.IGNORECASE)


In [None]:
# Another name can be used if the same name had be used previously

import re as regex
my_regex = regex.compile('[0-9]+', regex.I)
print(my_regex)

# This way can also be uses if the name is difficult to handle or it will be written a lot of times, for example:

import matplotlib.pyplot as plt         # Convention for matplotlib
plt.plot()

re.compile('[0-9]+', re.IGNORECASE)


In [None]:
# If specifics values are needed, they can be imported explicitly

from collections import defaultdict, Counter
lookup = defaultdict(int)
my_counter = Counter()

In [10]:
# Importing whole content from a module (with "*"), could overwrite defined variables

match = 10
from re import *        # re has a "match" function
print(match)

<function match at 0x000002A4717E0510>


### Functions

In [None]:
# A function is a rule that takes 0 or more inputs and returns an output, functions are defined with "def"

def double(x):
    """
    docstring explaining what the function does
    """
    return x * 2

double_num = double(1)
print(double_num)

2


In [None]:
# As first class functions, could be assigned to variables and given as an argument to another function

def apply_to_x(f, x):
    """Calls the function "f" as its argument"""
    return f(x)

my_double = double

x = apply_to_x(my_double, 5)
print(x)

10


In [None]:
# It's easy to create short anonnymous functions called lambdas

def apply_to_one(f):
    """Calls the function "f" as its argument"""
    return f(1)

y = apply_to_one(lambda x: x + 4)
print(y)

# lambdas could be assigned to variables but instead def could be used:

another_double = lambda x: 2 * x    # Avoid this

def another_double(x):              # Do this instead
    return 2 * x

5


In [None]:
# Default arguments could be assigned to parameters function

def my_print(message = 'default message'):
    print(message)
    
my_print()
my_print('hello')

default message
hello


In [None]:
# Sometimes especify arguments is useful

def full_name(first = 'what-his-name', last = 'something'):
    return first + ' ' + last

print(full_name('Noah', 'Gonzalez'))
print(full_name('Noah'))
print(full_name(last='Gonzalez'))

Noah Gonzalez
Noah something
what-his-name Gonzalez


### Strings

In [None]:
# Strings could be delimited by single quotes or doubles, but always in pairs:

single_quoted_str = 'data science'
double_quoted_str = "data science"
print(single_quoted_str)
print(double_quoted_str)

data science
data science


In [32]:
# Backslash is used by Python for special characters encoding

tab_string = '\t'  # It's a tab
print(tab_string)
print(len(tab_string))

	
1


In [None]:
# If special characters are wanted as their are, "raw strings" can be created

not_tab_string = r'\t'
print(not_tab_string)
print(len(not_tab_string))

\t
2


In [34]:
# Multiline strings can be created with triple quotes
multiline_str = '''First line,
second line,
third line
'''

print(multiline_str)

First line,
second line,
third line



In [35]:
# A f-string can substitute values for strings:

first_name = 'Noah'
last_name = 'Gonzalez'

full_name = f'{first_name} {last_name}'

# Other ways are not too easy to handle

full_name2 = first_name + ' ' + last_name               # Sum of strings
full_name3 = '{0} {1}'.format(first_name, last_name)    # string.format


print(full_name)
print(full_name2)
print(full_name3)

Noah Gonzalez
Noah Gonzalez
Noah Gonzalez


### Exceptions

In [None]:
# If shomething is wrong Python raise an Exception. Exception can crash a program if they are not properly handled, the can be handled using "try" and "except" 
# Exceptions make the code cleaner and readable

try:
    print(0/0)
except ZeroDivisionError:
    print('cannot divide by zero')

cannot divide by zero


### Lists

In [None]:
# One of the most essential Python's data structure is the list
# A list is a ordered collection (ad an array in other languages but with functionality added)

int_list = [1, 2, 3]
heterogeneous_list = ['string', 0.1, True]
list_of_lists = [int_list, heterogeneous_list, []]
list_lenght = len(int_list)
list_sum = sum(int_list)

print(int_list)
print(heterogeneous_list)
print(list_of_lists)
print(list_lenght)
print(list_sum)

[1, 2, 3]
['string', 0.1, True]
[[1, 2, 3], ['string', 0.1, True], []]
3
6


In [None]:
# The n item of a list can be set or fetched with brackets "[]" and its index

x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
zero = x[0]

print(x)
print(zero)

one = x[1]
nine = x[-1]        # Pythonic for last item
eight = x[-2]       # Pythonic for penultimate item
x[0] = -1           # Change first index to -1

print(one)
print(nine)
print(eight)
print(x)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
0
1
9
8
[-1, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [None]:
# Also brackets can be used to create list slices
# Slice i:j means all items from i (included) to j (excluded)


first_three = x[:3]
three_to_end = x[3:]
one_to_four = x[1:5]
last_three = x[-3:]
without_first_and_last = x[1:-1]
copy_of_x = x[:]


print(first_three)
print(three_to_end)
print(one_to_four)
print(last_three)
print(without_first_and_last)
print(copy_of_x)

[-1, 1, 2]
[3, 4, 5, 6, 7, 8, 9]
[1, 2, 3, 4]
[7, 8, 9]
[1, 2, 3, 4, 5, 6, 7, 8]
[-1, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [44]:
# Slices allow a third argument to indicate stride, that can be negative

every_third = x[::3]
five_to_three = x[5:2:-1]

print(every_third)
print(five_to_three)

[-1, 3, 6, 9]
[5, 4, 3]


In [None]:
# The "in" operator checks items of the list
# This check examine the list items one by one, so unless the list is short it takes a long time

print(1 in [1, 2, 3])
print(0 in [1, 2, 3])

True
False


In [47]:
# If a list need to be modified, "extend" can be used to add items

x = [1, 2, 3]
x.extend([4, 5, 6])

print(x)

[1, 2, 3, 4, 5, 6]


In [48]:
# If a list should not be modified, it could be extended in another variable

x = [1, 2, 3]
y = x + [4, 5, 6]

print(x)
print(y)

[1, 2, 3]
[1, 2, 3, 4, 5, 6]


In [49]:
# It is more frequent to add items to a list one by one

x = [1, 2, 3]
x.append(0)

print(x)

[1, 2, 3, 0]


In [50]:
# Frequently it is convenient to unpack lists (when the number of items is known)

x, y = [1, 2]

print(x)
print(y)

1
2


### Tuples

In [None]:
# Tuples are like immutable lists. They are specified using parentheses "()" or nothing

a_list = [1, 2]
a_tuple = (1, 2)
other_tuple = 3, 4
a_list[1] = 3

print(a_list)

try:
    a_tuple[1] = 3
except TypeError:
    print('cannot modify a tuple')

[1, 3]
cannot modify a tuple


In [52]:
# Tuples are a comfortable way to return many values of functions

def sum_and_product(x, y):
    return (x + y), (x * y)

sp = sum_and_product(2, 3)
s, p = sum_and_product(5, 10)

print(sp)
print(s, p)

(5, 6)
15 50


In [53]:
# Tuples (and lists) can be used for multiple assignation

x, y = 1, 2

print(x, y)

x, y = y, x     # Pythonic way to interchange variables

print(x, y)

1 2
2 1


### Dictionaries

In [5]:
# Fundamental data structure, associate values to keys and allows to retrieve quickly the value of a specific key

empty_dict = {}                     # Pythonic way
empty_dict2 = dict()                # Another way
grades = {'Noah': 88, 'Tim': 95}    # Literal dict

print(empty_dict)
print(empty_dict2)
print(grades)

{}
{}
{'Noah': 88, 'Tim': 95}


In [7]:
# A value for a key could be retrieved using brackes "[]"

noahs_grade = grades['Noah']

print(noahs_grade)

# But a KeyError will be araise if a non-existent key is asked

try:
    melisas_grade = grades['Melisa']
except KeyError:
        print('no grade for Melisa')

88
no grade for Melisa


In [None]:
# Existence of a key could be evaluated using "in"
# This evaluation is even faster for large dictionaries

noah_has_grade = 'Noah' in grades
melisa_has_grade = 'Melisa' in grades

print(noah_has_grade)
print(melisa_has_grade)

True
False


In [12]:
# Dictionaries have a "get" method that returns a default value (instead an exception) when a searched key is not in the dictionary

noahs_grade = grades.get('Noah', 0)
melisas_grade = grades.get('Melisa', 0)
no_ones_grade = grades.get('No One')        # Default values is "None"

print(noahs_grade)
print(melisas_grade)
print(no_ones_grade)

88
0
None


In [14]:
# Key/Value pairs can be assigned using brackets "[]"

grades['Tim'] = 99              # Replace previous value
grades['Melisa'] = 100          # Third entry is added

print(grades)

{'Noah': 88, 'Tim': 99, 'Melisa': 100}


In [15]:
# Dictionaries can be used to represent structured data:

tweet = {
    'user': 'Noah',
    'text': 'Data Science is awesome',
    'retweet_count': 100,
    'hashtags': ['#data', '#science', '#datascience', '#awesome']
}

print(tweet)

{'user': 'Noah', 'text': 'Data Science is awesome', 'retweet_count': 100, 'hashtags': ['#data', '#science', '#datascience', '#awesome']}


In [16]:
# Also all items in a dictionary can be showed

tweet_keys = tweet.keys()
tweet_values = tweet.values()
tweet_items = tweet.items()
search_key = 'user' in tweet_keys
search_key2 = 'user' in tweet
search_key3 = 'Noah' in tweet_values

print(tweet_keys)
print(tweet_values)
print(tweet_items)
print(search_key)
print(search_key2)
print(search_key3)

dict_keys(['user', 'text', 'retweet_count', 'hashtags'])
dict_values(['Noah', 'Data Science is awesome', 100, ['#data', '#science', '#datascience', '#awesome']])
dict_items([('user', 'Noah'), ('text', 'Data Science is awesome'), ('retweet_count', 100), ('hashtags', ['#data', '#science', '#datascience', '#awesome'])])
True
True
True


In [None]:
# Keys in a dictionary can be "hashables". Lists cannot be used as keys

#### defaultdict

In [17]:
# Imagine we need to count the words in a document

document = [
    'positive', 'gem', 'infection', 'twilight', 'borrow', 
    'count', 'leader', 'divide', 'positive', 'bond', 
    'deficiency', 'beach', 'infection', 'faith', 'beach', 
    'study', 'divide', 'deficiency', 'divide', 'positive', 
    'gem', 'infection', 'art', 'infection', 'positive', 
    'leader', 'divide', 'leader', 'count', 'infection', 
    'beach', 'count', 'beach', 'study', 'positive', 
    'borrow', 'faith', 'deficiency', 'bond', 'infection', 
    'gem', 'gem', 'count', 'deficiency', 'divide', 
    'deficiency', 'deficiency', 'study', 'coin', 'positive', 
    'gem', 'divide', 'study', 'divide', 'count', 
    'study', 'divide', 'divide', 'count', 'deficiency', 
    'faith', 'twilight', 'deficiency', 'faith', 'borrow', 
    'count', 'deficiency', 'positive', 'count'
    ]

In [18]:
# An obvious way to reach that is to create a dictionary where keys are words and values are counters
# When checking each word counter, it can be increased if it already exists or added it if it doesn't

word_counts = {}

for word in document:
    if word in word_counts:
        word_counts[word] += 1
    else:
        word_counts[word] = 1

print(word_counts)

{'positive': 7, 'gem': 5, 'infection': 6, 'twilight': 2, 'borrow': 3, 'count': 8, 'leader': 3, 'divide': 9, 'bond': 2, 'deficiency': 9, 'beach': 4, 'faith': 4, 'study': 5, 'art': 1, 'coin': 1}


In [19]:
# Exception can be handled when checking a non existing key

word_counts = {}

for word in document:
    try:
        word_counts[word] += 1
    except KeyError:
        word_counts[word] = 1

print(word_counts)

{'positive': 7, 'gem': 5, 'infection': 6, 'twilight': 2, 'borrow': 3, 'count': 8, 'leader': 3, 'divide': 9, 'bond': 2, 'deficiency': 9, 'beach': 4, 'faith': 4, 'study': 5, 'art': 1, 'coin': 1}


In [20]:
# A third way is using "get"

word_counts = {}

for word in document:
    previous_count = word_counts.get(word, 0)
    word_counts[word] = previous_count + 1

word_counts

{'positive': 7,
 'gem': 5,
 'infection': 6,
 'twilight': 2,
 'borrow': 3,
 'count': 8,
 'leader': 3,
 'divide': 9,
 'bond': 2,
 'deficiency': 9,
 'beach': 4,
 'faith': 4,
 'study': 5,
 'art': 1,
 'coin': 1}

In [21]:
# This is difficult to handle, that's why "defaultdict" is useful
# A "defaultdict" is like a normal dictionary but when a non existing key is searched, first it adds a value for the key using a zero-argument function
# "collections" package is needed

from collections import defaultdict

word_counts = defaultdict(int)          # int() produce 0
for word in document:
    word_counts[word] += 1

print(word_counts)

defaultdict(<class 'int'>, {'positive': 7, 'gem': 5, 'infection': 6, 'twilight': 2, 'borrow': 3, 'count': 8, 'leader': 3, 'divide': 9, 'bond': 2, 'deficiency': 9, 'beach': 4, 'faith': 4, 'study': 5, 'art': 1, 'coin': 1})


In [22]:
# Also can be useful with "list" or "dict" or our own functions

dd_list = defaultdict(list)                     # list() produce an empty list
dd_list[2].append(1)

dd_dict = defaultdict(dict)                     # dict() produce an empty dict
dd_dict['Noah']['City'] = 'Mexico City'

dd_pair = defaultdict(lambda: [0, 0])
dd_pair[2][1] = 1

print(dd_list)
print(dd_dict)
print(dd_pair)

defaultdict(<class 'list'>, {2: [1]})
defaultdict(<class 'dict'>, {'Noah': {'City': 'Mexico City'}})
defaultdict(<function <lambda> at 0x0000027AF35712F0>, {2: [0, 1]})


### Counters

In [23]:
# A "Counter" turns a values sequence into a "defauldict(int)" type object mapping keys in counters

from collections import Counter

c = Counter([0, 1, 2, 0])

print(c)

Counter({0: 2, 1: 1, 2: 1})


In [26]:
# This is a very simple way to solve the "word_counts" problem

word_counts = Counter(document)

print(word_counts)

Counter({'divide': 9, 'deficiency': 9, 'count': 8, 'positive': 7, 'infection': 6, 'gem': 5, 'study': 5, 'beach': 4, 'faith': 4, 'borrow': 3, 'leader': 3, 'twilight': 2, 'bond': 2, 'art': 1, 'coin': 1})


In [27]:
# A "Counter" instance has a "most_common" method that is used frequently

for word, count in word_counts.most_common(10):
    print(word, count)

divide 9
deficiency 9
count 8
positive 7
infection 6
gem 5
study 5
beach 4
faith 4
borrow 3


### Sets

In [29]:
# Another useful data structure is the "set", that represents a distinct items collection
# A set can be defined listing its items between braces "{}"

primes_below_10 = {2, 3, 5, 7}

print(primes_below_10)

{2, 3, 5, 7}


In [31]:
# But this doesn't work for empty sets because "{}" means an empty dict
# This case needs "set()"

s = set()
s.add(1)
s.add(2)
s.add(2)                # 2 cannot be added twice

print(s)

{1, 2}


In [None]:
# "in" operation is faster with sets

stopwords_list = ['a', 'an', 'at', 'yet', 'you', 'hundred of other words']
print('zip' in stopwords_list)                     # Each item needs to be verified

stopwords_set = set(stopwords_list)                # Faster
print('zip' in stopwords_set)

False
False


In [1]:
# Unique items of a collection can be finded

item_list = [1, 2, 3, 1, 2, 3]
num_items = len(item_list)
item_set = set(item_list)
num_distinct_items = len(item_set)
distinct_item_list = list(item_set)

print(item_list)
print(num_items)
print(item_set)
print(num_distinct_items)
print(distinct_item_list)

[1, 2, 3, 1, 2, 3]
6
{1, 2, 3}
3
[1, 2, 3]
