# 02_05: Advanced Containers

In [None]:
# note: this notebook requires Python 3.7 or higher;
# it will work with Python 3.6 if you run "!pip install dataclasses" in a cell

In [1]:
import math
import collections

import numpy as np
import pandas as pd
import matplotlib.pyplot as pp

%matplotlib inline   

In [2]:
people = [("Michele", "Vallisneri", "July 15"),
          ("Albert", "Einstein", "March 14"),
          ("John", "Lennon", "October 9"),
          ("Jocelyn", "Bell Burnell", "July 15")]

In [3]:
people[0][0]
# useful comprehensions to find all the certain ones. 

'Michele'

In [4]:
people[0][1]

'Vallisneri'

In [5]:
[person for person in people if person[2] == "July 15"]

[('Michele', 'Vallisneri', 'July 15'), ('Jocelyn', 'Bell Burnell', 'July 15')]

In [13]:
# defining the namedtuple "person"
# lets you create a specialist tuple that has a name and associates labels with fields. 
persontype = collections.namedtuple('person', ['firstname', 'lastname', "birthday"])

In [7]:
michele = persontype("Michele", "Vallisneri", "July 15")

In [8]:
michele = persontype(lastname="Vallisneri", firstname="Michele", birthday="July 15")

In [9]:
michele

person(firstname='Michele', lastname='Vallisneri', birthday='July 15')

In [10]:
michele[0], michele[1], michele[2]

('Michele', 'Vallisneri', 'July 15')

In [11]:
michele.firstname, michele.lastname, michele.birthday
# name tuples print nicely. 

('Michele', 'Vallisneri', 'July 15')

In [14]:
persontype(people[0])
# needs three arguments. There are two missing. 

TypeError: person.__new__() missing 2 required positional arguments: 'lastname' and 'birthday'

In [15]:
# use tuple unpacking on people[0] to build a namedtuple
persontype(*people[0])
# unpacks all of the lements since the star is present. 

person(firstname='Michele', lastname='Vallisneri', birthday='July 15')

In [16]:
namedpeople = [persontype(*person) for person in people]

In [17]:
namedpeople

[person(firstname='Michele', lastname='Vallisneri', birthday='July 15'),
 person(firstname='Albert', lastname='Einstein', birthday='March 14'),
 person(firstname='John', lastname='Lennon', birthday='October 9'),
 person(firstname='Jocelyn', lastname='Bell Burnell', birthday='July 15')]

In [18]:
[person for person in namedpeople if person.birthday == "July 15"]

[person(firstname='Michele', lastname='Vallisneri', birthday='July 15'),
 person(firstname='Jocelyn', lastname='Bell Burnell', birthday='July 15')]

In [None]:
# !pip install dataclasses

In [19]:
from dataclasses import dataclass

In [20]:
# defining a data class with the same content as the "person" nametuple
# and with a default for "birthday"

@dataclass
class personclass:
    firstname: str
    lastname: str
    birthday: str = 'unknown'

In [21]:
michele = personclass('Michele', 'Vallisneri')

In [22]:
michele

personclass(firstname='Michele', lastname='Vallisneri', birthday='unknown')

In [23]:
michele = personclass(firstname='Michele', lastname='Vallisneri')

In [24]:
michele.firstname, michele.lastname, michele.birthday

('Michele', 'Vallisneri', 'unknown')

In [25]:
michele[0]

TypeError: 'personclass' object is not subscriptable

In [26]:
print(michele)

personclass(firstname='Michele', lastname='Vallisneri', birthday='unknown')


In [30]:
# updating the data class "personclass" so that it can compute a person's full name

@dataclass # class decorators. Basically telling python to create a type of records with all fields as strings. 
class personclass2:
    firstname: str
    lastname: str
    birthday: str = 'unknown'
    
    # all methods in a class carry a conventional argument "self";
    # when the methods are called on an instance (here, a specific person),
    # "self" points the instance itself, so self.firstname and self.lastname
    # are the data fields in that instance
    def fullname(self):
        return self.firstname + ' ' + self.lastname

In [31]:
michele = personclass2('Michele', 'Vallisneri', 'July 15')

In [32]:
michele.fullname()

'Michele Vallisneri'

In [36]:
def mydefault():
    return "I don't know"

In [37]:
questions = collections.defaultdict(mydefault)

In [38]:
questions['The meaning of life']
# when you are building a dictionary when each key corresponds to a list of items. 

"I don't know"

In [39]:
questions

defaultdict(<function __main__.mydefault()>,
            {'The meaning of life': "I don't know"})

In [40]:
# creating a dict where a key maps to a list requires awkward code
# to create the list when a key is first seen...

birthdays = {}

for person in namedpeople:
    if person.birthday in birthdays:
        birthdays[person.birthday].append(person.firstname)
    else:
        birthdays[person.birthday] = [person.firstname]

In [41]:
birthdays

{'July 15': ['Michele', 'Jocelyn'],
 'March 14': ['Albert'],
 'October 9': ['John']}

In [45]:
list()
# taking advantage of the ault that list returned as a function returns an empty list. 
# Therefore looping over person can go straight into getting the key of the Bday and appending to the resulting list. 

[]

In [46]:
# ...but it happens elegantly with defaultdict

birthdays = collections.defaultdict(list)

for person in namedpeople:
    birthdays[person.birthday].append(person.firstname)

In [47]:
birthdays

defaultdict(list,
            {'July 15': ['Michele', 'Jocelyn'],
             'March 14': ['Albert'],
             'October 9': ['John']})