# Storing Data Using Other Collection Types

## Storing using set

In [1]:
vowels = {'a', 'e', 'i', 'o', 'u'}
vowels

{'a', 'e', 'i', 'o', 'u'}

In [2]:
vowels = {'a', 'e', 'a', 'a', 'i', 'o', 'u', 'u'}
vowels

{'a', 'e', 'i', 'o', 'u'}

In [3]:
{'a', 'e', 'i', 'o', 'u'} ==vowels

True

In [4]:
type(vowels)

set

In [5]:
type({1, 2, 3, 4})

set

In [6]:
set() # empty set
type(set())

set

In [7]:
set([2, 2, 3, 5, 6, 5])

{2, 3, 5, 6}

In [8]:
set(range(5))

{0, 1, 2, 3, 4}

In [9]:
vowels

{'a', 'e', 'i', 'o', 'u'}

In [10]:
vowels.add('y')
vowels

{'a', 'e', 'i', 'o', 'u', 'y'}

In [11]:
ten = set(range(10))
low = {0, 1, 2, 3, 4}
odd = {1, 4, 5, 7, 9}

In [12]:
low.add(9)
low

{0, 1, 2, 3, 4, 9}

In [13]:
ten

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}

In [14]:
low.difference(odd)

{0, 2, 3}

In [15]:
low.intersection(odd)

{1, 4, 9}

In [16]:
low.issubset(ten)

True

In [17]:
ten.issuperset(low)

True

In [18]:
low.remove(0)
low

{1, 2, 3, 4, 9}

In [19]:
low.symmetric_difference(odd)

{2, 3, 5, 7}

In [20]:
low.union(odd)

{1, 2, 3, 4, 5, 7, 9}

In [21]:
low.clear()

In [22]:
from typing import Set, TextIO
from io import StringIO

def observation_birds(file_name:TextIO)-> Set[str]:
    """ Read a file and return it's content as a set
   
    >>> infile = StringIO('bird 1\\nbird 2\\nbird 1\\n ')
    >>> birds = observation_birds(infile)
    >>> 'bird 1' in birds
    True
    >>> 'bird 2' in birds
    True
    >>> 'bird 4' in birds
    False
    >>> len(birds) != 2
    True
    """
    birds = set()
    for line in file_name:
        bird = line.strip()
        birds.add(bird)
    return birds
    
if __name__ == '__main__':
    from doctest import testmod
    testmod()
    with open('files/observations.txt', 'r') as file:
        print(observation_birds(file))

{'long-tailed jaeger', 'northern fulmar', 'snow goose', 'canada goose'}


## Storing using tuples

In [23]:
rock = 'anthracite'
rock[9]

'e'

In [24]:
rock[0:3]

'ant'

In [25]:
rock[:-5]

'anthr'

In [26]:
rock[-5:]

'acite'

In [27]:
for character in rock:
    print(character)

a
n
t
h
r
a
c
i
t
e


In [28]:
bases = ('A', 'C', 'G', 'T')
for base in bases:
    print(base)

A
C
G
T


In [29]:
life = (['Canada', 76.5], ['United States', 75.5], ['Mexico', 72.0])
life

(['Canada', 76.5], ['United States', 75.5], ['Mexico', 72.0])

## Storing using dictionary

In [30]:
# bird observation using a list
from typing import List, Any, TextIO, Dict
from io import StringIO
def count_bird(observation_file:TextIO)->List[List[Any]]:
    """Return a set of the bird species listed in observations_file, which has one bird species per line.
    
    >>> infile = StringIO('bird 1\\nbird 2\\nbird 1\\n')
    >>> count_bird(infile)
    [['bird 1', 2], ['bird 2', 1]]
    """
    bird_counts = []
    for line in observation_file:
        bird = line.strip()
        found = False
        for entry in bird_counts:
            if entry[0] == bird:
                entry[1] += 1
                found = True
        if not found :
            bird_counts.append([bird, 1])
    return bird_counts

if __name__ == '__main__':
    with open('files/observations.txt', 'r') as file:
        for bird in count_bird(file):
            print(bird[0], bird[1])
        testmod()

canada goose 5
long-tailed jaeger 2
snow goose 1
northern fulmar 1


In [31]:
bird_to_observations = {'canada goose' : 3,  'northern fulmar':1}
bird_to_observations

{'canada goose': 3, 'northern fulmar': 1}

In [32]:
bird_to_observations['northern fulmar']

1

In [33]:
bird_to_observations = {}
bird_to_observations['eagle'] = 999
bird_to_observations['snow goose'] = 33
bird_to_observations

{'eagle': 999, 'snow goose': 33}

In [34]:
bird_to_observations['eagle'] = 9
bird_to_observations

{'eagle': 9, 'snow goose': 33}

In [35]:
del bird_to_observations['snow goose']
bird_to_observations

{'eagle': 9}

In [36]:
bird_to_observations['snow goose'] = 9

In [37]:
'snow goose' in bird_to_observations

True

In [38]:
if 'eagle' in bird_to_observations:
    print('Eagles have been seen')

Eagles have been seen


In [39]:
bird_to_observations = {'canada goose': 183, 'long-tailed jaeger': 71,
                        'snow goose': 63, 'northern fulmar': 1}
for bird in bird_to_observations:
    print(bird, bird_to_observations[bird])

canada goose 183
long-tailed jaeger 71
snow goose 63
northern fulmar 1


In [40]:
scientist_to_birthdate = {'Newton' : 1642, 'Darwin' : 1809,'Turing' : 1912}
scientist_to_birthdate.keys()

dict_keys(['Newton', 'Darwin', 'Turing'])

In [41]:
scientist_to_birthdate.values()

dict_values([1642, 1809, 1912])

In [42]:
scientist_to_birthdate.items()

dict_items([('Newton', 1642), ('Darwin', 1809), ('Turing', 1912)])

In [43]:
scientist_to_birthdate.get('Newton')

1642

In [44]:
scientist_to_birthdate.get('Curie', 1867)

1867

In [45]:
researcher_to_birthdate = {'Curie' : 1867, 'Hopper' : 1906, 'Franklin' : 1920}
researcher_to_birthdate

{'Curie': 1867, 'Hopper': 1906, 'Franklin': 1920}

In [46]:
scientist_to_birthdate.update(researcher_to_birthdate)
scientist_to_birthdate

{'Newton': 1642,
 'Darwin': 1809,
 'Turing': 1912,
 'Curie': 1867,
 'Hopper': 1906,
 'Franklin': 1920}

In [47]:
researcher_to_birthdate

{'Curie': 1867, 'Hopper': 1906, 'Franklin': 1920}

In [48]:
researcher_to_birthdate.clear()

In [49]:
researcher_to_birthdate

{}

In [50]:
# bird observation using a dictionary
def count_birds(observation_file:TextIO)-> Dict[str, int]:
    """Return a set of the bird species listed in observations_file, which has one bird species per line.
    
    >>> infile = StringIO('bird 1\\nbird 2\\nbird 1\\n')
    >>> count_birds(infile)
    {'bird 1': 2, 'bird 2': 1}
    """
    
    bird_to_observations = {}
    
    for line in observation_file:
        bird = line.strip()
        if bird in bird_to_observations:
            bird_to_observations[bird] += 1
        else:
            bird_to_observations[bird] = 1
    return bird_to_observations

if __name__ == '__main__':
    with open('files/observations.txt', 'r') as file:
        bird_to_observations = count_birds(file)
        for bird, observations in bird_to_observations.items():
            print(bird, observations)
        testmod()

canada goose 5
long-tailed jaeger 2
snow goose 1
northern fulmar 1


In [51]:
observation_to_birds = {}
for bird, observation in bird_to_observations.items():
    if observation in observation_to_birds:
        observation_to_birds[observation].append(bird)
    else:
        observation_to_birds[observation] = [bird]
observation_to_birds

{5: ['canada goose'],
 2: ['long-tailed jaeger'],
 1: ['snow goose', 'northern fulmar']}

In [52]:
for key in sorted(observation_to_birds.keys()):
    print(key, ':', end=' ')
    for value in observation_to_birds[key]:
        print(value, end=' ')
    print()

1 : snow goose northern fulmar 
2 : long-tailed jaeger 
5 : canada goose 


In [53]:
'snow goose' in bird_to_observations

True

## Exercises

**Exercise 1**

Write a function called find_dups that takes a list of integers as its input 
argument and returns a set of those integers occurring two or more times in the list.

In [61]:
def find_dups(list_of_integers:list)-> Set:
    """Returns a set of numbers that appears more than once in a list of numbers
    
    >>> find_dups([1, 2, 1, 2, 3, 4, 6])
    {1, 2}
    >>> find_dups([1,2,3,4])
    set()
    """
    list_set = set()
    dup_set = set()
    for number in list_of_integers:
        len_initial = len(list_set)
        list_set.add(number)
        len_after = len(list_set)
        if len_initial == len_after:
            dup_set.add(number)
    return dup_set

list_of_integers = [1, 2, 1, 2, 3, 4, 6, 9, 9, 0, 0, 8, 6, 6, 9]
print(find_dups(list_of_integers))
testmod()

{0, 1, 2, 6, 9}


TestResults(failed=0, attempted=12)

**Exercise 3**

Python’s set objects have a method called pop that removes and returns an
arbitrary element from the set. If the set gerbils contains five cuddly little ani-
mals, for example, calling gerbils.pop() five times will return those animals one
by one, leaving the set empty at the end. Use this to write a function called
mating_pairs that takes two equal-sized sets called males and females as input and
returns a set of pairs; each pair must be a tuple containing one male and one
female. (The elements of males and females may be strings containing gerbil
names or gerbil ID numbers—your function must work with both.)

In [220]:
def mating_pairs(males:set, females:set)-> set :
    """ Takes two sets (males, females) of equal sizes as input values and returns a set of tuple; where each tuple
    contains one male and female value. if an empty set in passed it returns an empty set.
    
    >>> mating_pairs({'cat', 'dog', 'lion'}, {'tiger', 'hen', 'shark'})
    {('cat', 'shark'), ('lion', 'tiger'), ('dog', 'hen')}
    >>> mating_pairs(set(), set())
    set()
    """
    mating_set = set()
    for entry in range(len(males)):
        male, female = males.pop(), females.pop()
        mating_set.add((male, female))
    return mating_set    

print(mating_pairs({'cat', 'dog', 'lion'}, {'tiger', 'hen', 'shark'}))
testmod()

{('cat', 'shark'), ('lion', 'tiger'), ('dog', 'hen')}


TestResults(failed=0, attempted=14)

**Exercise 4**

The PDB file format is often used to store information about molecules. A
PDB file may contain zero or more lines that begin with the word AUTHOR (which
may be in uppercase, lowercase, or mixed case), followed by spaces or tabs,
followed by the name of the person who created the file. Write a function that
takes a list of filenames as an input argument and returns the set of all author
names found in those files.

In [371]:
def author_names(filenames:TextIO)-> set:
    """Takes in a PDB file and returns all the names of the authors contained in the file
    
    >>> infile = StringIO('author\\n :\\n kent,\\n andreas,\\n ')
    >>> author_names(infile)
    {'kent, andreas'}
    >>> infile = StringIO('author\\n :\\n kent,\\n ')
    >>> author_names(infile)
    {'kent'}
    >>> infile = StringIO('author\\n :\\n ')
    >>> author_names(infile)
    set()
    """
    name_set = set()
    for names in filenames:
        if names.lower().startswith('author'):   
            name = names[7:].strip()
            name_set.add(name)
    return name_set
    
if __name__ == '__main__':
    with open('files/authors.txt', 'r') as filename:
        filenames = filename.readlines()
        print(author_names(filenames))

{'ben, carlson', 'kent, bukky'}


In [337]:
with open('files/file_example.txt', 'r') as filename:
        filename = filename.readlines()
        for i in filename:
            i = i.strip()
            print(i[:6])

First 
Second
Third 
